diff --git a/.gitignore b/.gitignore index 5d7dea7f..3c628ab0 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ .Ruserdata inst/doc Tplyr.Rproj +docs/ diff --git a/DESCRIPTION b/DESCRIPTION index 53a1f42f..2a4ea779 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Tplyr -Title: A Grammar of Clinical Data Summary -Version: 0.4.4 +Title: A Traceability Focused Grammar of Clinical Data Summary +Version: 1.0.0 Authors@R: c( person(given = "Eli", @@ -25,7 +25,7 @@ Authors@R: person(given = "Atorus Research LLC", role = "cph") ) -Description: A tool created to simplify the data manipulation necessary to create clinical reports. +Description: A traceability focused tool created to simplify the data manipulation necessary to create clinical summaries. License: MIT + file LICENSE URL: https://github.com/atorus-research/Tplyr BugReports: https://github.com/atorus-research/Tplyr/issues @@ -55,6 +55,6 @@ Suggests: pharmaRTF, withr VignetteBuilder: knitr -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.1 RdMacros: lifecycle Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 61d2ce6e..87428457 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,10 @@ # Generated by roxygen2: do not edit by hand S3method(build,tplyr_table) +S3method(get_meta_result,data.frame) +S3method(get_meta_result,tplyr_table) +S3method(get_meta_subset,data.frame) +S3method(get_meta_subset,tplyr_table) S3method(get_numeric_data,tplyr_layer) S3method(get_numeric_data,tplyr_table) S3method(get_stats_data,tplyr_layer) @@ -9,15 +13,23 @@ S3method(get_where,tplyr_layer) S3method(get_where,tplyr_table) S3method(print,f_str) S3method(print,tplyr_layer) +S3method(print,tplyr_layer_template) +S3method(print,tplyr_meta) S3method(print,tplyr_table) S3method(process_formatting,count_layer) S3method(process_formatting,desc_layer) S3method(process_formatting,shift_layer) +S3method(process_metadata,count_layer) +S3method(process_metadata,desc_layer) +S3method(process_metadata,shift_layer) +S3method(process_metadata,tplyr_riskdiff) S3method(process_statistic_data,tplyr_riskdiff) S3method(process_statistic_formatting,tplyr_riskdiff) S3method(process_summaries,count_layer) S3method(process_summaries,desc_layer) S3method(process_summaries,shift_layer) +S3method(set_denoms_by,count_layer) +S3method(set_denoms_by,shift_layer) S3method(set_format_strings,count_layer) S3method(set_format_strings,desc_layer) S3method(set_where,tplyr_layer) @@ -25,21 +37,31 @@ S3method(set_where,tplyr_table) S3method(str,f_str) S3method(str,tplyr_layer) S3method(str,tplyr_table) +export("%>%") export("header_n<-") export("pop_data<-") export(add_column_headers) +export(add_filters) export(add_layer) export(add_layers) export(add_risk_diff) export(add_total_group) export(add_total_row) export(add_treat_grps) +export(add_variables) +export(append_metadata) +export(apply_formats) export(apply_row_masks) export(build) export(f_str) export(get_by) export(get_count_layer_formats) export(get_desc_layer_formats) +export(get_layer_template) +export(get_layer_templates) +export(get_meta_result) +export(get_meta_subset) +export(get_metadata) export(get_numeric_data) export(get_pop_where) export(get_precision_by) @@ -53,12 +75,15 @@ export(group_desc) export(group_shift) export(header_n) export(keep_levels) +export(new_layer_template) export(pop_data) export(pop_treat_var) export(process_formatting) +export(process_metadata) export(process_statistic_data) export(process_statistic_formatting) export(process_summaries) +export(remove_layer_template) export(set_by) export(set_count_layer_formats) export(set_custom_summaries) @@ -72,6 +97,7 @@ export(set_header_n) export(set_indentation) export(set_missing_count) export(set_nest_count) +export(set_numeric_threshold) export(set_order_count_method) export(set_ordering_cols) export(set_outer_sort_position) @@ -79,32 +105,41 @@ export(set_pop_data) export(set_pop_treat_var) export(set_pop_where) export(set_precision_by) +export(set_precision_data) export(set_precision_on) export(set_result_order_var) export(set_shift_layer_formats) +export(set_stats_as_columns) export(set_target_var) export(set_total_row_label) export(set_treat_var) export(set_where) +export(str_indent_wrap) export(tplyr_layer) +export(tplyr_meta) export(tplyr_table) export(treat_grps) export(treat_var) +export(use_template) importFrom(assertthat,assert_that) importFrom(dplyr,across) importFrom(dplyr,add_tally) +importFrom(dplyr,anti_join) importFrom(dplyr,arrange) importFrom(dplyr,as_tibble) importFrom(dplyr,between) importFrom(dplyr,bind_cols) importFrom(dplyr,bind_rows) importFrom(dplyr,case_when) +importFrom(dplyr,cur_group) importFrom(dplyr,distinct) importFrom(dplyr,do) importFrom(dplyr,everything) importFrom(dplyr,filter) importFrom(dplyr,full_join) importFrom(dplyr,group_by) +importFrom(dplyr,group_keys) +importFrom(dplyr,if_else) importFrom(dplyr,lag) importFrom(dplyr,left_join) importFrom(dplyr,mutate) @@ -112,6 +147,8 @@ importFrom(dplyr,mutate_all) importFrom(dplyr,mutate_at) importFrom(dplyr,mutate_if) importFrom(dplyr,n) +importFrom(dplyr,n_distinct) +importFrom(dplyr,pull) importFrom(dplyr,rename) importFrom(dplyr,row_number) importFrom(dplyr,rowwise) @@ -128,6 +165,7 @@ importFrom(lifecycle,deprecate_soft) importFrom(lifecycle,deprecate_stop) importFrom(magrittr,"%>%") importFrom(magrittr,extract) +importFrom(magrittr,extract2) importFrom(purrr,flatten) importFrom(purrr,imap) importFrom(purrr,map) @@ -178,12 +216,14 @@ importFrom(rlang,is_quosure) importFrom(rlang,is_quosures) importFrom(rlang,quo) importFrom(rlang,quo_get_expr) +importFrom(rlang,quo_is_call) importFrom(rlang,quo_is_missing) importFrom(rlang,quo_is_null) importFrom(rlang,quo_is_symbol) importFrom(rlang,quo_name) importFrom(rlang,quos) importFrom(rlang,sym) +importFrom(rlang,syms) importFrom(rlang,trace_back) importFrom(rlang,warn) importFrom(stats,IQR) @@ -198,6 +238,7 @@ importFrom(stringr,str_detect) importFrom(stringr,str_extract) importFrom(stringr,str_extract_all) importFrom(stringr,str_locate_all) +importFrom(stringr,str_match_all) importFrom(stringr,str_pad) importFrom(stringr,str_remove_all) importFrom(stringr,str_replace) @@ -206,6 +247,7 @@ importFrom(stringr,str_split) importFrom(stringr,str_starts) importFrom(stringr,str_sub) importFrom(stringr,str_trim) +importFrom(stringr,str_wrap) importFrom(tibble,add_column) importFrom(tibble,rownames_to_column) importFrom(tibble,tibble) @@ -218,6 +260,7 @@ importFrom(tidyr,starts_with) importFrom(tidyselect,all_of) importFrom(tidyselect,any_of) importFrom(tidyselect,vars_select) +importFrom(utils,capture.output) importFrom(utils,head) importFrom(utils,str) importFrom(utils,tail) diff --git a/NEWS.md b/NEWS.md index 08287911..5c592c88 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,20 @@ +# Tplyr 1.0.0 + +- New features + - Introduction of traceability metadata framework #32 + - Framework for creating re-usable layer templates #66 + - Native pipe compatibility #33 + - Automatically load magrittr pipe #22 + - Refactor of count layer programming #28 + - Allow external precision data for desc layers #27 + - Allow denominators within count layers as formattable values #11 + - Descriptive statistics layers allow stats as columns #37 + - New experimental function `set_numeric_threshold()` + - Apply f_str's outside of a Tplyr table with new function `apply_formats()` #57 + - New post processing function helper `str_indent_wrap()` for hyphen-enabled string wrapping #59 +- Bug fixes + - Fix errors in partially provided precision caps #20 + # Tplyr 0.4.4 - Added new functionality per issue #10. Adds 'Both' an option for sorting outer layers of nested count. diff --git a/R/apply_formats.R b/R/apply_formats.R new file mode 100644 index 00000000..38f9b5eb --- /dev/null +++ b/R/apply_formats.R @@ -0,0 +1,63 @@ +#' Apply Format Strings outside of a Tplyr table +#' +#' The `f_str` object in Tplyr is used to drive formatting of the outputs +#' strings within a Tplyr table. This function allows a user to use the same +#' interface to apply formatted string on any data frame within a +#' `dplyr::mutate()` context. +#' +#' Note that auto-precision is not currently supported within `apply_formats()` +#' +#' @param format_string The desired display format. X's indicate digits. On the +#' left, the number of x's indicates the integer length. On the right, the +#' number of x's controls decimal precision and rounding. Variables are +#' inferred by any separation of the 'x' values other than a decimal. +#' @param ... The variables to be formatted using the format specified in +#' \code{format_string}. These must be numeric variables. +#' @param empty The string to display when the numeric data is not available. +#' Use a single element character vector, with the element named '.overall' to +#' instead replace the whole string. +#' +#' @return Character vector of formatted values +#' @md +#' @export +#' +#' @examples +#' +#' library(dplyr) +#' +#' mtcars %>% +#' head() %>% +#' mutate( +#' fmt_example = apply_formats('xxx (xx.x)', hp, wt) +#' ) +apply_formats <- function(format_string, ..., empty = c(.overall = "")) { + format <- f_str(format_string, ..., empty=empty) + + if (format$auto_precision) { + stop('Auto-precision is not currently supported within the `apply_formats()` context', + call.=FALSE) + } + + pmap_chr(list(...), function(...) apply_fmts(...), fmt=format) +} + +#' Application of individual format string +#' +#' This is what's used internally on the vectorized apply_formats +#' +#' @param ... The variables to be formatted using the format specified in +#' the `f_str` object +#' @param fmt An f_str object +#' @md +#' +#' @return An individually formatted string +#' @noRd +apply_fmts <- function(..., fmt) { + nums <- list(...) + repl <- vector('list', length(fmt$settings)) + for (i in seq_along(fmt$settings)) { + repl[[i]] <- num_fmt(nums[[i]], i, fmt=fmt) + } + args <- append(list(fmt$repl_str), repl) + do.call('sprintf', args) +} diff --git a/R/assertions.R b/R/assertions.R index 2f5884e2..ba108032 100644 --- a/R/assertions.R +++ b/R/assertions.R @@ -138,7 +138,7 @@ assert_quo_var_present <- function(quo_list, vnames=NULL, envir=NULL, allow_char # Check each element of the `quo_list` list for (v in quo_list) { - if (class(quo_get_expr(v)) == "name") { + if (inherits(quo_get_expr(v), "name")) { vname <- as_name(quo_get_expr(v)) assert_that(vname %in% vnames, msg = paste0("`", param, "` variable `",vname, "` does not exist in target dataset")) @@ -203,7 +203,7 @@ unpack_vars <- function(quo_list, allow_character=TRUE) { #' #' @noRd is_null_or_call <- function(quo_var) { - quo_is_null(quo_var) || class(quo_get_expr(quo_var)) == "call" + quo_is_null(quo_var) || inherits(quo_get_expr(quo_var), "call") } #' Check if a quosure is null or contains a logical value diff --git a/R/build.R b/R/build.R index 2381387f..6b2250f3 100644 --- a/R/build.R +++ b/R/build.R @@ -2,22 +2,33 @@ #' Trigger the execution of the \code{tplyr_table} #' -#' @description -#' The functions used to assemble a \code{tplyr_table} object and each of the layers do not trigger the processing of any data. Rather, a lazy -#' execution style is used to allow you to contruct your table and then explicitly state when the data processing should happen. -#' \code{build} triggers this event. +#' @description The functions used to assemble a \code{tplyr_table} object and +#' each of the layers do not trigger the processing of any data. Rather, a lazy +#' execution style is used to allow you to construct your table and then +#' explicitly state when the data processing should happen. \code{build} +#' triggers this event. #' -#' @details -#' When the \code{build} command is executed, all of the data processing commences. Any preprocessing necessary within the table environment -#' takes place first. Next, each of the layers begins executing. Once the layers complete executing, the output of each layer is stacked -#' into the resulting data frame. +#' @details When the \code{build} command is executed, all of the data +#' processing commences. Any pre-processing necessary within the table +#' environment takes place first. Next, each of the layers begins executing. +#' Once the layers complete executing, the output of each layer is stacked into +#' the resulting data frame. #' -#' Once this process is complete, any post-processing necessary within the table environment takes place, and the final output can be -#' delivered. Metadata and traceability information are kept within each of the layer environments, which allows an investigation into the -#' source of the resulting datapoints. For example, numeric data from any summaries performed is maintained and accessible within -#' a layer using \code{\link{get_numeric_data}}. +#' Once this process is complete, any post-processing necessary within the table +#' environment takes place, and the final output can be delivered. Metadata and +#' traceability information are kept within each of the layer environments, +#' which allows an investigation into the source of the resulting datapoints. +#' For example, numeric data from any summaries performed is maintained and +#' accessible within a layer using \code{\link{get_numeric_data}}. +#' +#' The `metadata` option of build will trigger the construction of traceability +#' metadata for the constructed data frame. Essentially, for every "result" that +#' Tplyr produces, Tplyr can also generate the steps necessary to obtain the +#' source data which produced that result from the input. For more information, +#' see vignette("metadata"). #' #' @param x A \code{tplyr_table} object +#' @param metadata Trigger to build metadata. Defaults to FALSE #' #' @return An executed \code{tplyr_table} #' @export @@ -36,15 +47,14 @@ #' build() #' #' @seealso tplyr_table, tplyr_layer, add_layer, add_layers, layer_constructors -build <- function(x) { - +build <- function(x, metadata=FALSE) { UseMethod("build") } #' tplyr_table S3 method #' @noRd #' @export -build.tplyr_table <- function(x) { +build.tplyr_table <- function(x, metadata=FALSE) { op <- options() @@ -71,6 +81,36 @@ build.tplyr_table <- function(x) { ungroup() %>% select(starts_with('row_label'), starts_with('var'), "ord_layer_index", everything()) + # Process metadata if triggered + if (metadata) { + metadata_list <- purrr::map(x$layers, process_metadata) + + # Prepare metadata like the output + metadata <- metadata_list %>% + map2_dfr(seq_along(metadata_list), add_layer_index) %>% + ungroup() %>% + mutate( + row_id = paste0(row_id, '_', ord_layer_index) + ) %>% + select(row_id, starts_with('row_label'), starts_with('var'), everything(), -starts_with('ord')) + + # Finish off the row_id with the layer indicator and put row_id up front + output <- output %>% + mutate( + row_id = paste0(row_id, '_', ord_layer_index) + ) %>% + select(row_id, everything()) + + # Write the metadata to the environment + env_bind(x, metadata=metadata) + } else { + # Drop row_id if metadata isn't built + output <- output %>% + select(-row_id) + } + + + }, finally = { # Set options back to defaults options(op) @@ -98,7 +138,7 @@ process_summaries <- function(x, ...) { #' @param x A tplyr_layer object #' @param ... arguments passed to dispatch #' -#' @return The formatted_table object that is binded to the layer +#' @return The formatted_table object that is bound to the layer #' @export #' @keywords internal process_formatting <- function(x, ...) { @@ -110,6 +150,19 @@ prepare_format_metadata <- function(x) { UseMethod("prepare_format_metadata") } +#' Process layers to get metadata tables +#' +#' This is an internal method, but is exported to support S3 dispatch. Not intended for direct use by a user. +#' @param x A tplyr_layer object +#' @param ... arguments passed to dispatch +#' +#' @return The formatted_meta object that is bound to the layer +#' @export +#' @keywords internal +process_metadata <- function(x, ...) { + UseMethod("process_metadata") +} + #' Fetch table formatting info from layers #' #' @param x A tplyr_table object diff --git a/R/count.R b/R/count.R index 2b4a8b76..1368d81f 100644 --- a/R/count.R +++ b/R/count.R @@ -3,7 +3,7 @@ #' @export process_summaries.count_layer <- function(x, ...) { - if(env_get(x, "is_built_nest", default = FALSE)) { + if (env_get(x, "is_built_nest", default = FALSE)) { refresh_nest(x) } @@ -14,10 +14,10 @@ process_summaries.count_layer <- function(x, ...) { # Check 'kept_levels' and stop if they're not in the target dataset #Logic to check for keep_levels # If this is not a built nest - if(!("tplyr_layer" %in% class(env_parent()))) { + if (!("tplyr_layer" %in% class(env_parent()))) { keep_levels_logic <- expr(!is.null(levels_to_keep)) # If this is a built nest and we're begining to process - }else if("tplyr_layer" %in% class(env_parent()) && length(target_var) == 2){ + } else if ("tplyr_layer" %in% class(env_parent()) && length(target_var) == 2) { keep_levels_logic <- expr(!is.null(levels_to_keep) && quo_is_symbol(target_var[[1]])) # If this is a built nest and we are processing the "sub" layers } else { @@ -25,8 +25,8 @@ process_summaries.count_layer <- function(x, ...) { } # Check that all values in 'keep levels' are present in the data - if(eval_tidy(keep_levels_logic)) { - if(is.factor(target[[as_name(tail(target_var, 1)[[1]])]])){ + if (eval_tidy(keep_levels_logic)) { + if (is.factor(target[[as_name(tail(target_var, 1)[[1]])]])) { target_levels <- levels(target[[as_name(tail(target_var, 1)[[1]])]]) } else { target_levels <- unique(target[[as_name(tail(target_var, 1)[[1]])]]) @@ -40,16 +40,14 @@ process_summaries.count_layer <- function(x, ...) { } # Save this for the denominator where, but only if it hasn't been saved yet. - if(is.null(built_target_pre_where)) built_target_pre_where <- built_target - - + if (is.null(built_target_pre_where)) built_target_pre_where <- built_target built_target <- built_target %>% filter(!!where) %>% filter(!!kept_levels) ## Drop levels if target var is factor and kept levels used - if(eval_tidy(keep_levels_logic) && + if (eval_tidy(keep_levels_logic) && is.factor(built_target[[as_name(tail(target_var, 1)[[1]])]])) { # Pull out the levels that weren't in keep levels. target_levels <- levels(built_target[[as_name(tail(target_var, 1)[[1]])]]) @@ -66,14 +64,14 @@ process_summaries.count_layer <- function(x, ...) { "` is invalid. Filter error:\n", e)) }) - if(!quo_is_symbol(target_var[[1]]) && as_name(target_var[[1]]) %in% names(target)) { + if (!quo_is_symbol(target_var[[1]]) && as_name(target_var[[1]]) %in% names(target)) { warning(paste0("The first target variable has been coerced into a symbol.", " You should pass variable names unquoted."), immediate. = TRUE) target_var[[1]] <- quo(!!sym(as_name(target_var[[1]]))) } - if(length(target_var) == 2 && !quo_is_symbol(target_var[[2]]) && + if (length(target_var) == 2 && !quo_is_symbol(target_var[[2]]) && as_name(target_var[[2]]) %in% names(target)) { warning(paste0("The second target variable has been coerced into a symbol.", "You should pass variable names unquoted."), immediate. = TRUE) @@ -86,9 +84,9 @@ process_summaries.count_layer <- function(x, ...) { rename_missing_values(x) # Preprocssing in the case of two target_variables - if(length(env_get(x, "target_var")) > 2) abort("Only up too two target_variables can be used in a count_layer") + if (length(env_get(x, "target_var")) > 2) abort("Only up too two target_variables can be used in a count_layer") - else if(length(env_get(x, "target_var")) == 2) { + else if (length(env_get(x, "target_var")) == 2) { # Change treat_var to factor so all combinations appear in nest factor_treat_var(x) @@ -129,23 +127,18 @@ process_summaries.count_layer <- function(x, ...) { process_single_count_target <- function(x) { evalq({ - if(is.null(include_total_row)) include_total_row <- FALSE - if(is.null(total_row_label)) total_row_label <- "Total" + if (is.null(include_total_row)) include_total_row <- FALSE + if (is.null(total_row_label)) total_row_label <- "Total" # The current environment should be the layer itself process_count_n(current_env()) - if(!is.null(distinct_by)) process_count_distinct_n(current_env()) - - if(include_total_row){ + if (include_total_row) { process_count_total_row(current_env()) - if(!is.null(distinct_by)) { - process_count_distinct_total_row(current_env()) - } # Used to temporarily check formats - if(is.null(format_strings)) tmp_fmt <- gather_defaults.count_layer(current_env()) - if(count_missings && !(is.null(denom_ignore) || length(denom_ignore) == 0) && + if (is.null(format_strings)) tmp_fmt <- gather_defaults.count_layer(current_env()) + if (count_missings && !(is.null(denom_ignore) || length(denom_ignore) == 0) && (("pct" %in% total_count_format$vars || "distinct_pct" %in% total_count_format$vars) || # Logic if no total_count format (is.null(total_count_format) && is.null(format_strings) && ("pct" %in% tmp_fmt$n_counts$vars || "distinct_pct" %in% tmp_fmt$n_counts$vars)) || @@ -157,110 +150,32 @@ process_single_count_target <- function(x) { } } - if(is.null(count_row_prefix)) count_row_prefix <- "" + if (is.null(count_row_prefix)) count_row_prefix <- "" - if(is.null(denoms_by)) denoms_by <- c(treat_var, cols) + if (is.null(denoms_by)) denoms_by <- c(treat_var, cols) # rbind tables together numeric_data <- summary_stat %>% bind_rows(total_stat) %>% rename("summary_var" = !!target_var[[1]]) %>% group_by(!!!denoms_by) %>% - do(get_denom_total(., denoms_by, denoms_df, denoms_distinct_df, "n")) %>% + do(get_denom_total(., denoms_by, denoms_df, "n")) %>% mutate(summary_var = prefix_count_row(summary_var, count_row_prefix)) %>% ungroup() - if(!is.null(distinct_stat)) { - if(include_total_row) { + if (!is.null(distinct_stat)) { + if (include_total_row) { distinct_stat <- distinct_stat %>% bind_rows(total_stat_denom) %>% group_by(!!!denoms_by) %>% - do(get_denom_total(., denoms_by, denoms_df, denoms_distinct_df, "distinct_n")) + do(get_denom_total(., denoms_by, denoms_df, "distinct_n")) } numeric_data <- bind_cols(numeric_data, distinct_stat[, c("distinct_n", "distinct_total")]) } - }, envir = x) -} - -#' @noRd -process_nested_count_target <- function(x) { - - evalq({ - - if(is.null(indentation)) indentation <- " " - - - - assert_that(quo_is_symbol(target_var[[2]]), - msg = "Inner layers must be data driven variables") - - first_layer <- process_summaries(group_count(current_env(), target_var = !!target_var[[1]], - by = vars(!!!by), where = !!where)) - - second_layer <- process_summaries(group_count(current_env(), target_var = !!target_var[[2]], - by = vars(!!target_var[[1]], !!!by), where = !!where) %>% - set_count_row_prefix(indentation)) - - first_layer_final <- first_layer$numeric_data - # add_column(!!target_var[[1]] := .[["summary_var"]]) - - second_layer_final <- second_layer$numeric_data %>% - group_by(!!target_var[[1]]) %>% - do(filter_nested_inner_layer(., target, target_var[[1]], target_var[[2]], indentation)) - - # Bind the numeric data together - numeric_data <- bind_rows(first_layer_final, second_layer_final) - - # Save the original by and target_vars incase the layer is rebuilt - by_saved <- by - target_var_saved <- target_var - is_built_nest <- TRUE - - by <- vars(!!target_var[[1]], !!!by) - target_var <- vars(!!target_var[[2]]) - }, envir = x) - -} - -#' This function resets the variables for a nested layer after it was built -#' @noRd -refresh_nest <- function(x) { - env_bind(x, by = env_get(x, "by_saved")) - env_bind(x, target_var = env_get(x, "target_var_saved")) -} - -#' This function is meant to remove the values of an inner layer that don't -#' appear in the target data -#' @noRd -filter_nested_inner_layer <- function(.group, target, outer_name, inner_name, indentation) { - - # Is outer variable text? If it is don't filter on it - text_outer <- !quo_is_symbol(outer_name) - outer_name <- as_name(outer_name) - inner_name <- as_name(inner_name) - - if(text_outer) { - target_inner_values <- target %>% - select(any_of(inner_name)) %>% - unlist() %>% - paste0(indentation, .) - } else { - current_outer_value <- unique(.group[, outer_name])[[1]] - - target_inner_values <- target %>% - filter(!!sym(outer_name) == current_outer_value) %>% - select(any_of(inner_name)) %>% - unlist() %>% - paste0(indentation, .) - } - - .group %>% - filter(summary_var %in% target_inner_values) - } #' Process the n count data and put into summary_stat @@ -271,15 +186,33 @@ process_count_n <- function(x) { evalq({ + if (is.null(denoms_by)) denoms_by <- c(treat_var, cols) + denoms_by_ <- map(denoms_by, function(x) { + if (as_name(x) == "summary_var") quo(!!target_var[[1]]) + else x + }) + summary_stat <- built_target %>% + mutate( + across( + .cols = any_of(map_chr(c(denoms_by, target_var, by), ~as_name(.))), + .fns = function(x) if (is.factor(x)) x else as.factor(x) + ) + ) %>% # Group by variables including target variables and count them group_by(!!treat_var, !!!by, !!!target_var, !!!cols) %>% - tally(name = "n") %>% - mutate(n = as.double(n)) %>% + summarize( + n = n(), + distinct_n = n_distinct(!!!distinct_by, !!treat_var, !!!target_var) + ) %>% + mutate( + n = as.double(n), + distinct_n = as.double(distinct_n) + ) %>% ungroup() # If there is a missing_count_string, but its not in the dataset - if(!is.null(missing_count_string) && + if (!is.null(missing_count_string) && !((any(unname(unlist(missing_count_list)) %in% unique(built_target[, as_name(target_var[[1]])]))) || any(is.na(built_target[, as_name(target_var[[1]])])))) { @@ -293,79 +226,20 @@ process_count_n <- function(x) { summary_stat <- summary_stat %>% # complete all combinations of factors to include combinations that don't exist. # add 0 for combinations that don't exist - complete(!!treat_var, !!!by, !!!target_var, !!!cols, fill = list(n = 0, total = 0)) %>% + complete(!!treat_var, !!!by, !!!target_var, !!!cols, + fill = list(n = 0, total = 0, distinct_n = 0, distinct_total = 0)) %>% # Change the treat_var and first target_var to characters to resolve any # issues if there are total rows and the original column is numeric mutate(!!treat_var := as.character(!!treat_var)) %>% - mutate(!!as_name(target_var[[1]]) := as.character(!!target_var[[1]])) - - # If there is no values in summary_stat, which can happen depending on where. Return nothing - if(nrow(summary_stat) == 0) return() - }, envir = x) - -} - -#' Process the distinct n count data and put into summary_stat -#' -#' @param x Count layer -#' @noRd -process_count_distinct_n <- function(x) { - - evalq({ - - # Subset the local built_target based on where - # Catch errors - - - - if(is.null(denoms_by)) denoms_by <- c(treat_var, cols) - - distinct_stat <- built_target %>% - # Filter out based on where - filter(!!where) %>% - mutate( - across( - .cols = any_of(map_chr(c(denoms_by, target_var, by), ~as_name(.))), - .fns = function(x) if(is.factor(x)) x else as.factor(x) - ) - ) %>% - # Distinct based on the current distinct_by, target_var, and treat_var - # treat_var is added because duplicates would be created when there are - # treatment group totals - distinct(!!!distinct_by, !!treat_var, !!!target_var, .keep_all = TRUE) %>% - # Group by variables including target variables and count them - group_by(!!treat_var, !!!by, !!!target_var, !!!cols) %>% - tally(name = "distinct_n") %>% + mutate(!!as_name(target_var[[1]]) := as.character(!!target_var[[1]])) %>% + group_by(!!!denoms_by_) %>% ungroup() - if(!is.null(missing_count_string) && - - !((unname(unlist(missing_count_list)) %in% unique(built_target[, as_name(target_var[[1]])])) || - any(is.na(built_target[, as_name(target_var[[1]])])))) { - # This adds the missing string as a factor to the tallies. This is needed - # to make sure the missing row is added even if there are no missing values. - summary_stat <- summary_stat %>% - mutate(!!target_var[[1]] := fct_expand(as.character(.data[[as_name(target_var[[1]])]]), - names(missing_count_list))) - } - - - # complete all combinations of factors to include combinations that don't exist. - # add 0 for combinations that don't exist - distinct_stat <- distinct_stat %>% - complete(!!treat_var, !!!by, !!!cols, !!!target_var, fill = list(distinct_n = 0, distinct_total = 0)) %>% - # Change the treat_var and first target_var to characters to resolve any - # issues if there are total rows and the original column is numeric - mutate(!!treat_var := as.character(!!treat_var)) %>% - group_by(!!!denoms_by) %>% - do(get_denom_total(., denoms_by, denoms_df, denoms_distinct_df, "distinct_n")) %>% - ungroup() %>% - rename("distinct_total" = "total") - - + rm(denoms_by_) # If there is no values in summary_stat, which can happen depending on where. Return nothing - if(nrow(summary_stat) == 0) return() + if (nrow(summary_stat) == 0) return() }, envir = x) + } #' Process the amounts for a total row @@ -376,7 +250,8 @@ process_count_total_row <- function(x) { evalq({ # Check if denoms_by wasn't passed and by was passed. - if(is.null(denoms_by) & any(map_lgl(by, quo_is_symbol)) > 0) { + if (exists("include_total_row") && include_total_row && + identical(denoms_by, c(treat_var, cols)) && any(map_lgl(by, quo_is_symbol)) > 0) { warning("A total row was added in addition to non-text by variables, but no denoms_by variable was set. This may cause unexpected results. If you wish to change this behavior, use `set_denoms_by()`.", immediate. = TRUE) @@ -394,7 +269,7 @@ change this behavior, use `set_denoms_by()`.", immediate. = TRUE) }, treat_var, cols) #Create an expression to evaluate filter - if(!count_missings){ + if (!count_missings) { filter_logic <- expr(!(!!target_var[[1]] %in% names(missing_count_list))) } else { filter_logic <- expr(TRUE) @@ -407,9 +282,11 @@ change this behavior, use `set_denoms_by()`.", immediate. = TRUE) # Use distinct if this is a distinct total row # Group by all column variables group_by(!!treat_var, !!!cols, !!!denoms_by[needed_denoms_by]) %>% - summarize(n = sum(n)) %>% + summarize( + n = sum(n), + distinct_n = sum(distinct_n) + ) %>% ungroup() %>% - mutate(total = n) %>% # Create a variable to label the totals when it is merged in. mutate(!!as_name(target_var[[1]]) := total_row_label) %>% # Create variables to carry forward 'by'. Only pull out the ones that @@ -422,55 +299,6 @@ change this behavior, use `set_denoms_by()`.", immediate. = TRUE) }, envir = x) } -process_count_distinct_total_row <- function(x) { - evalq({ - - # Check if denoms_by wasn't passed and by was passed. - if(is.null(denoms_by) & any(map_lgl(by, quo_is_symbol)) > 0) { - warning("A total row was added in addition to non-text by variables, but -no denoms_by variable was set. This may cause unexpected results. If you wish to -change this behavior, use `set_denoms_by()`.", immediate. = TRUE) - } - - # Make sure the denoms_by is stripped - # Stripped of cols and treat_var variables, otherwise it will error out in the group_by - # I thought of replacing the group by with !!!unique(c(treat_var, cols, denoms_by)) - # but that doesn't work due to the denoms_by having an environment set - - # Logical vector that is used to remove the treat_var and cols - needed_denoms_by <- map_lgl(denoms_by, function(x, treat_var, cols) { - all(as_name(x) != as_name(treat_var), - as_name(x) != map_chr(cols, as_name)) - }, treat_var, cols) - - #Create an expression to evaluate filter - if(!count_missings){ - filter_logic <- expr(!(!!target_var[[1]] %in% names(missing_count_list))) - } else { - filter_logic <- expr(TRUE) - } - - # create a data.frame to create total counts - total_stat_denom <- distinct_stat %>% - #Filter out any ignored denoms - filter(!!filter_logic) %>% - # Group by all column variables - group_by(!!treat_var, !!!cols, !!!denoms_by[needed_denoms_by]) %>% - summarize(distinct_n = sum(distinct_n)) %>% - ungroup() %>% - mutate(distinct_total = distinct_n) %>% - # Create a variable to label the totals when it is merged in. - mutate(!!as_name(target_var[[1]]) := total_row_label) %>% - # Create variables to carry forward 'by'. Only pull out the ones that - # aren't symbols - group_by(!!!extract_character_from_quo(by)) %>% - # ungroup right away to make sure the complete works - ungroup() %>% - # complete based on missing groupings - complete(!!treat_var, !!!cols, fill = list(distinct_n = 0, distinct_total = 0)) - }, envir = x) -} - #' Prepare metadata for table #' #' @param x count_layer object @@ -479,7 +307,7 @@ prepare_format_metadata.count_layer <- function(x) { evalq({ # Get formatting metadata prepared - if(is.null(format_strings)) { + if (is.null(format_strings)) { format_strings <- gather_defaults(environment()) } else if (!'n_counts' %in% names(format_strings)) { format_strings[['n_counts']] <- gather_defaults(environment())[['n_counts']] @@ -489,7 +317,7 @@ prepare_format_metadata.count_layer <- function(x) { # If there is both n & distinct, or pct and distinct_pct there has to be a # distinct_by # If both distinct and n - if(((("distinct_n" %in% map(format_strings$n_counts$vars, as_name) & + if (((("distinct_n" %in% map(format_strings$n_counts$vars, as_name) & "n" %in% map(format_strings$n_counts$vars, as_name)) | # or both distinct_pct and pct ("distinct_pct" %in% map(format_strings$n_counts$vars, as_name) & @@ -500,21 +328,21 @@ prepare_format_metadata.count_layer <- function(x) { } # If distinct_by isn't there, change distinct and distinct_pct - if(is.null(distinct_by) & "distinct_n" %in% map(format_strings$n_counts$vars, as_name)) { + if (is.null(distinct_by) & "distinct_n" %in% map(format_strings$n_counts$vars, as_name)) { distinct_ind <- which(map(format_strings$n_counts$vars, as_name) %in% "distinct_n") format_strings$n_counts$vars[[distinct_ind]] <- expr(n) } - if(is.null(distinct_by) & "distinct_pct" %in% map(format_strings$n_counts$vars, as_name)) { + if (is.null(distinct_by) & "distinct_pct" %in% map(format_strings$n_counts$vars, as_name)) { distinct_ind <- which(map(format_strings$n_counts$vars, as_name) %in% "distinct_pct") format_strings$n_counts$vars[[distinct_ind]] <- expr(pct) } # Pull max character length from counts. Should be at least 1 - n_width <- max(c(nchar(numeric_data$n), 1L)) + n_width <- max(c(nchar(numeric_data$n), 1L), na.rm = TRUE) # If a layer_width flag is present, edit the formatting string to display the maximum # character length - if(str_detect(format_strings[['n_counts']]$format_string, "a")) { + if (str_detect(format_strings[['n_counts']]$format_string, "a")) { # Replace the flag with however many xs replaced_string <- str_replace(format_strings[['n_counts']]$format_string, "a", paste(rep("x", n_width), collapse = "")) @@ -536,21 +364,19 @@ process_formatting.count_layer <- function(x, ...) { #used to split the string. indentation_length <- ifelse(is.null(indentation), 0, nchar(encodeString(indentation))) - formatted_data <- numeric_data - - # if(is_built_nest && !quo_is_symbol(by[[1]])) { - # names(formatted_data) <- str_remove_all(names(formatted_data), "\\\"") - # } - - - formatted_data <- formatted_data %>% + formatted_data <- numeric_data %>% + filter_numeric(numeric_cutoff, + numeric_cutoff_stat, + numeric_cutoff_column, + treat_var) %>% # Mutate value based on if there is a distinct_by mutate(n = { - construct_count_string(.n=n, .total=total, - .distinct_n=distinct_n, .distinct_total=distinct_total, - count_fmt=format_strings[['n_counts']], - max_layer_length=max_layer_length, - max_n_width=max_n_width, + construct_count_string(.n = n, .total = total, + .distinct_n = distinct_n, + .distinct_total = distinct_total, + count_fmt = format_strings[['n_counts']], + max_layer_length = max_layer_length, + max_n_width = max_n_width, missing_string = missing_string, missing_f_str = missing_count_string, summary_var = summary_var, @@ -566,8 +392,7 @@ process_formatting.count_layer <- function(x, ...) { # Replace the by variables and target variable names with `row_label` replace_by_string_names(quos(!!!by, summary_var)) - if(is_built_nest) { - + if (is_built_nest) { # I had trouble doing this in a 'tidy' way so I just did it here. # First column is always the outer target variable. # Last row label is always the inner target variable @@ -581,7 +406,7 @@ process_formatting.count_layer <- function(x, ...) { if (!is_empty(stats)) { # Process the statistical data formatting formatted_stats_data <- map(stats, process_statistic_formatting) %>% - reduce(full_join, by=c('summary_var', match_exact(c(by, head(target_var, -1))))) %>% + reduce(full_join, by = c('summary_var', match_exact(c(by, head(target_var, -1))))) %>% # Replace the by variables and target variable names with `row_label` replace_by_string_names(quos(!!!by, summary_var)) @@ -589,9 +414,8 @@ process_formatting.count_layer <- function(x, ...) { by = vars_select(names(formatted_data), starts_with("row_label"))) } - - - + # Attach the row identifier + formatted_data <- assign_row_id(formatted_data, 'c') }, envir = x) add_order_columns(x) @@ -634,7 +458,7 @@ construct_count_string <- function(.n, .total, .distinct_n = NULL, .distinct_tot total_rows <- FALSE # Add in the missing format if its null and there are missing counts - if(has_missing_count && is.null(missing_f_str)) { + if (has_missing_count && is.null(missing_f_str)) { missing_f_str <- count_fmt } @@ -650,7 +474,7 @@ construct_count_string <- function(.n, .total, .distinct_n = NULL, .distinct_tot } ## Pull out string information for total rows - if(!is.null(total_count_format)) { + if (!is.null(total_count_format)) { total_rows <- summary_var %in% total_row_label total_vars_ord <- map_chr(total_count_format$vars, as_name) } @@ -662,8 +486,8 @@ construct_count_string <- function(.n, .total, .distinct_n = NULL, .distinct_tot # Append the repl_str to be passed to do.call str_all[1] <- count_fmt$repl_str # Iterate over every variable - for(i in seq_along(vars_ord)) { - str_all[[i+1]] <- count_string_switch_help(vars_ord[i], count_fmt, .n[!missing_rows & !total_rows], .total[!missing_rows & !total_rows], + for (i in seq_along(vars_ord)) { + str_all[[i + 1]] <- count_string_switch_help(vars_ord[i], count_fmt, .n[!missing_rows & !total_rows], .total[!missing_rows & !total_rows], .distinct_n[!missing_rows & !total_rows], .distinct_total[!missing_rows & !total_rows], vars_ord) } @@ -672,23 +496,33 @@ construct_count_string <- function(.n, .total, .distinct_n = NULL, .distinct_tot # Same logic as above, just add for missing missing_str_all <- vector("list", 5) missing_str_all[1] <- missing_f_str$repl_str - for(i in seq_along(missing_vars_ord)) { - missing_str_all[[i+1]] <- count_string_switch_help(missing_vars_ord[i], missing_f_str, .n[missing_rows], .total[missing_rows], - .distinct_n[missing_rows], .distinct_total[missing_rows], missing_vars_ord) + for (i in seq_along(missing_vars_ord)) { + missing_str_all[[i + 1]] <- count_string_switch_help(missing_vars_ord[i], + missing_f_str, + .n[missing_rows], + .total[missing_rows], + .distinct_n[missing_rows], + .distinct_total[missing_rows], + missing_vars_ord) } total_str_all <- vector("list", 5) total_str_all[1] <- total_count_format$repl_str - for(i in seq_along(total_vars_ord)) { - total_str_all[[i+1]] <- count_string_switch_help(total_vars_ord[i], total_count_format, .n[total_rows], .total[total_rows], - .distinct_n[total_rows], .distinct_total[total_rows], total_vars_ord) + for (i in seq_along(total_vars_ord)) { + total_str_all[[i + 1]] <- count_string_switch_help(total_vars_ord[i], + total_count_format, + .n[total_rows], + .total[total_rows], + .distinct_n[total_rows], + .distinct_total[total_rows], + total_vars_ord) } # Put the vector strings together. Only include parts of str_all that aren't null # nm is non-missing, m is mising, and t is total. string_nm <- do.call(sprintf, str_all[!map_lgl(str_all, is.null)]) - if(!is.null(missing_vars_ord)) string_m <- do.call(sprintf, missing_str_all[!map_lgl(missing_str_all, is.null)]) - if(!is.null(total_vars_ord)) string_t <- do.call(sprintf, total_str_all[!map_lgl(total_str_all, is.null)]) + if (!is.null(missing_vars_ord)) string_m <- do.call(sprintf, missing_str_all[!map_lgl(missing_str_all, is.null)]) + if (!is.null(total_vars_ord)) string_t <- do.call(sprintf, total_str_all[!map_lgl(total_str_all, is.null)]) # string_ is the final string to return. Merge the missing, non-missing, and others together string_ <- character(length(string_nm) + length(string_m) + length(string_t)) string_[!missing_rows & !total_rows] <- string_nm @@ -732,6 +566,12 @@ count_string_switch_help <- function(x, count_fmt, .n, .total, pcts <- replace(.distinct_n/.distinct_total, is.na(.distinct_n/.distinct_total), 0) map_chr(pcts*100, num_fmt, which(vars_ord == "distinct_pct"), fmt = count_fmt) + }, + "total" = { + map_chr(.total, num_fmt, which(vars_ord == "total"), fmt = count_fmt) + }, + "distinct_total" = { + map_chr(.distinct_total, num_fmt, which(vars_ord == "distinct_total"), fmt = count_fmt) } ) @@ -780,18 +620,18 @@ process_count_denoms <- function(x) { }) # Raise errors if a denom is ignored but there isn't a missing count string - if(!is.null(denom_ignore) && is.null(missing_count_string)) { + if (!is.null(denom_ignore) && is.null(missing_count_string)) { abort("A value(s) were set with 'denom_ignore' but no missing count was set. Your percentages/totals may not have meaning.") } # Logic to determine how to subset target for denominators - if(is.null(denom_where)) { + if (is.null(denom_where)) { denom_where <- where } # Because the missing strings haven't replaced the missing strings, it has to happen here. # Expand denoms contains the - if(!is.null(missing_count_list)) { + if (!is.null(missing_count_list)) { expand_denoms <- names(missing_count_list) %in% unlist(denom_ignore) denom_ignore <- c(denom_ignore, unname(missing_count_list[expand_denoms])) } @@ -809,37 +649,41 @@ process_count_denoms <- function(x) { "` is invalid. Filter error:\n", e)) }) - - if(!is.null(distinct_by)) { # For distinct counts, we want to defer back to the # population dataset. Trigger this by identifying that - # the population dataset was overridden - if (!isTRUE(try(identical(pop_data, target)))) { - if(deparse(denom_where) != deparse(where)){ - warning(paste0("A `denom_where` has been set with a pop_data. The `denom_where` has been ignored.", - "You should use `set_pop_where` instead of `set_denom_where`.", sep = "\n"), - immediate. = TRUE) - } - denoms_distinct_df <- built_pop_data %>% - rename(!!treat_var := !!pop_treat_var) - } else { - denoms_distinct_df <- denom_target + # the population dataset was overridden + if (!isTRUE(try(identical(pop_data, target)))) { + if (deparse(denom_where) != deparse(where)) { + warning(paste0("A `denom_where` has been set with a pop_data. The `denom_where` has been ignored.", + "You should use `set_pop_where` instead of `set_denom_where`.", sep = "\n"), + immediate. = TRUE) } - denoms_distinct_df <- denoms_distinct_df %>% - distinct(!!!distinct_by, !!treat_var, .keep_all = TRUE) %>% - group_by(!!!cols, !!treat_var) %>% - summarize(distinct_n = n()) %>% - ungroup() %>% - complete(!!!cols, !!treat_var, fill = list(distinct_n = 0)) } - denoms_df <- denom_target %>% + denoms_df_n <- denom_target %>% group_by(!!!layer_params[param_apears]) %>% - summarize(n = n()) %>% - ungroup() %>% - complete(!!!layer_params[param_apears], fill = list(n = 0)) + summarize( + n = n() + ) %>% + ungroup() + + denoms_df_dist <- built_pop_data %>% + filter(!!denom_where) %>% + group_by(!!pop_treat_var) %>% + summarize( + distinct_n = n_distinct(!!!distinct_by, !!pop_treat_var) + ) %>% + ungroup() + + by_join <- as_name(pop_treat_var) + names(by_join) <- as_name(treat_var) + + denoms_df <- denoms_df_n %>% + complete(!!!layer_params[param_apears], + fill = list(n = 0)) %>% + left_join(denoms_df_dist, by = by_join) - if(as_name(target_var[[1]]) %in% names(target)) { + if (as_name(target_var[[1]]) %in% names(target)) { denoms_df <- denoms_df %>% rename("summary_var" := !!target_var[[1]]) } @@ -851,19 +695,19 @@ process_count_denoms <- function(x) { rename_missing_values <- function(x) { evalq({ # Rename missing values - if(!is.null(missing_count_list)){ + if (!is.null(missing_count_list)) { missing_count_list_ <- missing_count_list # If the target variable isn't a character or a factor. Coerse it as a # character. This can happen if the target var is numeric - if(!(class(built_target[, as_name(target_var[[1]])][[1]]) %in% c("factor", "character"))) { + if (!(class(built_target[, as_name(target_var[[1]])][[1]]) %in% c("factor", "character"))) { built_target <- built_target %>% mutate(!!target_var[[1]] := as.character(!!target_var[[1]])) } # Collapse the factors that were missing. - for(i in seq_along(missing_count_list)) { + for (i in seq_along(missing_count_list)) { # Logic if the missing_count_list contains an implicit NA - if(any(is.nan(missing_count_list[[i]]))){ + if (any(is.nan(missing_count_list[[i]]))) { ## Repalce the NA in the missing_count list with an explicit value missing_count_list_[[i]] <- ifelse(missing_count_list[[i]] == "NaN", "(Missing_NAN)", as.character(missing_count_list[[i]])) # Replace the implicit values in built_target @@ -871,7 +715,7 @@ rename_missing_values <- function(x) { mutate(!!target_var[[1]] := fct_expand(!!target_var[[1]], "(Missing_NAN)")) %>% mutate(!!target_var[[1]] := ifelse(is.nan(!!target_var[[1]]), "(Missing_NAN)", as.character(!!target_var[[1]]))) - } else if(any(is.na(missing_count_list[[i]]))){ + } else if (any(is.na(missing_count_list[[i]]))) { ## Repalce the NA in the missing_count list with an explicit value missing_count_list_[[i]] <- ifelse(is.na(as.character(missing_count_list[[i]])) , "(Missing)", as.character(missing_count_list[[i]])) # Replace the implicit values in built_target @@ -890,3 +734,30 @@ rename_missing_values <- function(x) { } }, envir = x) } + +filter_numeric <- function(.data, + numeric_cutoff, + numeric_cutoff_stat, + numeric_cutoff_column, + treat_var, + by = NULL) { + + if (is.null(numeric_cutoff)) { + return(.data) + } + + vals <- .data %>% + {if (is.null(numeric_cutoff_column)) . else filter(., !!treat_var == numeric_cutoff_column)} %>% + mutate( + pct = n/total, + distinct_pct = distinct_n/distinct_total + ) %>% + filter(!!sym(numeric_cutoff_stat) >= !!numeric_cutoff) %>% + extract2("summary_var") + + .data %>% + filter(summary_var %in% vals) + + +} + diff --git a/R/count_bindings.R b/R/count_bindings.R index 3dcb8e49..98cecd75 100644 --- a/R/count_bindings.R +++ b/R/count_bindings.R @@ -90,19 +90,21 @@ set_total_row_label <- function(e, total_row_label) { #' #' In some situations, count summaries may want to see distinct counts by a #' variable like subject. For example, the number of subjects in a population -#' who had a particular adverse event. \code{set_distinct_by} allows you to set +#' who had a particular adverse event. `set_distinct_by` allows you to set #' the by variables used to determine a distinct count. #' -#' When a \code{distinct_by} value is set, distinct counts will be used by +#' When a `distinct_by` value is set, distinct counts will be used by #' default. If you wish to combine distinct and not distinct counts, you can -#' choose which to display in your \code{\link{f_str}} objects using \code{n}, -#' \code{pct}, \code{distinct}, and \code{distinct_pct}. +#' choose which to display in your `f_str()` objects using `n`, +#' `pct`, `distinct_n`, and `distinct_pct`. Additionally, denominators +#' may be presented using `total` and `distinct_total` #' -#' @param e A \code{count_layer/shift_layer} object +#' @param e A `count_layer/shift_layer` object #' @param distinct_by Variable(s) to get the distinct data. #' #' @return The layer object with #' @export +#' @md #' #' @examples #' #Load in pipe @@ -582,6 +584,8 @@ set_denom_where <- function(e, denom_where) { e } +#' @export +#' @noRd set_denoms_by.count_layer <- function(e, ...) { dots <- vars(...) dots_chr <- map_chr(dots, as_name) @@ -596,13 +600,13 @@ set_denoms_by.count_layer <- function(e, ...) { assert_that(all(dots_chr %in% c(by_, cols_, treat_var_, target_var_)), msg = "A denom_by wasn't found as a grouping variable in the layer/table.") - if(length(target_var) == 2) { + if (length(target_var) == 2) { assert_that(!(as_name(target_var[[2]]) %in% dots_chr), msg = "You can not pass the second variable in `vars` as a denominator.") } # If the row variable is here, rename it to summary_var - if(as_name(target_var[[1]]) %in% dots_chr) { + if (as_name(target_var[[1]]) %in% dots_chr) { dots[dots_chr %in% as_name(target_var[[1]])] <- quos(summary_var) } @@ -652,3 +656,50 @@ keep_levels <- function(e, ...) { e } +#' Set a numeric cutoff +#' +#' @description +#' `r lifecycle::badge("experimental")` +#' +#' In certain tables, it may be necessary to only include rows that meet numeric +#' conditions. Rows that are less than a certain cutoff can be suppressed from +#' the output. This function allows you to pass a cutoff, a cutoff stat(n, +#' distinct_n, pct, or distinct_pct) to supress values that are lesser than the +#' cutoff. +#' +#' +#' +#' @param e A \code{count_layer} object +#' @param numeric_cutoff A numeric value where only values greater than or equal +#' to will be displayed. +#' @param stat The statistic to use when filtering out rows. Either 'n', +#' 'distinct_n', or 'pct' are allowable +#' @param column If only a particular column should be used to cutoff values, it +#' can be supplied here as a character value. +#' +#' @return The modified Tplyr layer object +#' @export +#' @md +#' +#' @examples +#' mtcars %>% +#' tplyr_table(gear) %>% +#' add_layer( +#' group_count(cyl) %>% +#' set_numeric_threshold(10, "n") %>% +#' add_total_row() %>% +#' set_order_count_method("bycount") +#' ) +set_numeric_threshold <- function(e, numeric_cutoff, stat, column = NULL) { + + assert_that(is.numeric(numeric_cutoff), + msg = "The `numeric_cutoff` parameter must be numeric") + assert_that(stat %in% c("n", "distinct_n", "pct"), + msg = "Allowed values for 'stat' are 'n', 'distinct_n', or 'pct'") + + env_bind(e, numeric_cutoff = numeric_cutoff) + env_bind(e, numeric_cutoff_stat = stat) + env_bind(e, numeric_cutoff_column = column) + + e +} diff --git a/R/denom.R b/R/denom.R index 4a2161b5..4598d260 100644 --- a/R/denom.R +++ b/R/denom.R @@ -148,34 +148,25 @@ get_header_n_value.data.frame <- function(x, ...) { #' #' @return A data.frame with the #' @noRd -get_denom_total <- function(.data, denoms_by, denoms_df, denoms_distinct_df, total_extract = "n") { - - if(total_extract == "n") { - # Filter denoms dataset - vars_in_denoms <- denoms_by[map_lgl(denoms_by, ~ as_name(.) %in% names(denoms_df))] - filter_logic <- map(vars_in_denoms, function(x) { - expr(!!sym(as_name(x)) == !!unique(.data[, as_name(x)])[[1]]) - }) - - sums <- denoms_df %>% - filter(!!!filter_logic) %>% - group_by(!!!vars_in_denoms) %>% - extract("n") - - } else { - # Filter denoms dataset - vars_in_denoms <- denoms_by[map_lgl(denoms_by, ~ as_name(.) %in% names(denoms_distinct_df))] - filter_logic <- map(vars_in_denoms, function(x) { - expr(!!sym(as_name(x)) == !!unique(.data[, as_name(x)])[[1]]) - }) - - sums <- denoms_distinct_df %>% - filter(!!!filter_logic) %>% - group_by(!!!vars_in_denoms) %>% - extract("distinct_n") - } +get_denom_total <- function(.data, denoms_by, denoms_df, + total_extract = "n") { + + # Filter denoms dataset + vars_in_denoms <- denoms_by[map_lgl(denoms_by, ~ as_name(.) %in% names(denoms_df))] + filter_logic <- map(vars_in_denoms, function(x) { + expr(!!sym(as_name(x)) == !!unique(.data[, as_name(x)])[[1]]) + }) - .data$total <- ifelse(nrow(sums) > 0, sum(sums, na.rm = TRUE), 0) + sums <- denoms_df %>% + filter(!!!filter_logic) %>% + group_by(!!!vars_in_denoms) + + .data$total <- ifelse(nrow(sums) > 0, sum(sums[["n"]], na.rm = TRUE), 0) + # distinct_n is present for all count layers, but not shift layers, so + # dont' do this for shift layers + if ("distinct_n" %in% names(sums)) + .data$distinct_total <- ifelse(nrow(sums) > 0, sums[["distinct_n"]], 0) .data + } diff --git a/R/desc.R b/R/desc.R index 515c047f..c6ad4b6c 100644 --- a/R/desc.R +++ b/R/desc.R @@ -19,7 +19,10 @@ process_summaries.desc_layer <- function(x, ...) { evalq({ # trans_sums is the data that will pass forward to be formatted trans_sums <- vector("list", length(target_var)) - # num_sums is the data that will be bound together and returned to provide the numeric internal values + # num_sums is the data that will be bound together and returned to provide + # the numeric internal values + # num_sums_raw is kept separate to better facililate use for prep of metadata + num_sums_raw <- vector("list", length(target_var)) num_sums <- vector("list", length(target_var)) # Get the row labels out from the format strings list @@ -46,7 +49,7 @@ process_summaries.desc_layer <- function(x, ...) { summaries <- get_summaries()[match_exact(summary_vars)] # Create the numeric summary data - num_sums[[i]] <- built_target %>% + num_sums_raw[[i]] <- built_target %>% # Rename the current variable to make each iteration use a generic name rename(.var = !!cur_var) %>% # Group by treatment, provided by variable, and provided column variables @@ -58,7 +61,7 @@ process_summaries.desc_layer <- function(x, ...) { complete(!!treat_var, !!!by, !!!cols) # Create the transposed summary data to prepare for formatting - trans_sums[[i]] <- num_sums[[i]] %>% + trans_sums[[i]] <- num_sums_raw[[i]] %>% # Transpose the summaries that make up the first number in a display string # into the the `value` column with labels by `stat` pivot_longer(cols = match_exact(trans_vars), names_to = "stat") %>% @@ -77,7 +80,7 @@ process_summaries.desc_layer <- function(x, ...) { } # Numeric data needs the variable names replaced and add summary variable name - num_sums[[i]] <- replace_by_string_names(num_sums[[i]], by) %>% + num_sums[[i]] <- replace_by_string_names(num_sums_raw[[i]], by) %>% mutate(summary_var = as_name(cur_var)) %>% select(summary_var, everything()) @@ -88,9 +91,6 @@ process_summaries.desc_layer <- function(x, ...) { # Bind the numeric data together within the layer numeric_data <- pivot_longer(bind_rows(num_sums), cols = match_exact(summary_vars), names_to = "stat") - # Delete the listed numeric data - rm(num_sums) - }, envir=x) } @@ -109,8 +109,13 @@ process_formatting.desc_layer <- function(x, ...) { form_sums <- vector("list", length(target_var)) if (need_prec_table) { - # If the precision table is required, create it - prec <- make_prec_data(built_target, precision_by, precision_on, cap) + if ('prec' %in% ls()) { + # If precision data was manually specified, grab it + prec <- get_prec_data(built_target, prec, precision_by, precision_on, cap, prec_error) + } else { + # Otherwise create it + prec <- make_prec_data(built_target, precision_by, precision_on, cap) + } } for (i in seq_along(trans_sums)) { @@ -128,35 +133,54 @@ process_formatting.desc_layer <- function(x, ...) { .fmt_str = format_strings), format_strings=format_strings) - # String pad each of the display strings to match the longest value across layers - # TODO: Introduce auto-padding after alhpa release - # trans_sums[[i]] <- trans_sums[[i]] %>% - # mutate(display_string = str_pad(display_string, max_layer_length, side='right')) - # Now do one more transpose to split the columns out # Default is to use the treatment variable, but if `cols` was provided # then also transpose by cols. - form_sums[[i]] <- trans_sums[[i]] %>% - pivot_wider(id_cols=c('row_label', match_exact(by)), # Keep row_label and the by variables - names_from = match_exact(vars(!!treat_var, !!!cols)), # Pull the names from treatment and cols argument - names_prefix = paste0('var', i, "_"), # Prefix with the name of the target variable - values_from = display_string # Use the created display_string variable for values + if (stats_as_columns) { + form_sums[[i]] <- trans_sums[[i]] %>% + pivot_wider(id_cols=c(!!treat_var, match_exact(by)), # Keep row_label and the by variables + names_from = match_exact(vars(row_label, !!!cols)), # Pull the names from treatment and cols argument + names_prefix = paste0('var', i, "_"), # Prefix with the name of the target variable + values_from = display_string # Use the created display_string variable for values + ) + + } else { + form_sums[[i]] <- trans_sums[[i]] %>% + pivot_wider(id_cols=c('row_label', match_exact(by)), # Keep row_label and the by variables + names_from = match_exact(vars(!!treat_var, !!!cols)), # Pull the names from treatment and cols argument + names_prefix = paste0('var', i, "_"), # Prefix with the name of the target variable + values_from = display_string # Use the created display_string variable for values ) + + } } # Join the final outputs - formatted_data <- reduce(form_sums, full_join, by=c('row_label', match_exact(by))) %>% - rowwise() %>% - # Replace NA values with the proper empty strings - mutate_at(vars(starts_with('var')), ~ replace_na(.x, format_strings[[row_label]]$empty)) + if (stats_as_columns) { + formatted_data <- reduce(form_sums, full_join, by=c(as_label(treat_var), match_exact(by))) + + # Replace row label names + formatted_data <- replace_by_string_names(formatted_data, by, treat_var) + } else { + formatted_data <- reduce(form_sums, full_join, by=c('row_label', match_exact(by))) + + # Replace row label names + formatted_data <- replace_by_string_names(formatted_data, by) + } + + + # Don't want to delete this until I'm absolutely sure it's not necessary + # formatted_data <- formatted_data %>% + # rowwise() %>% + # # Replace NA values with the proper empty strings + # mutate_at(vars(starts_with('var')), ~ replace_na(.x, format_strings[[row_label]]$empty)) - # Replace row label names - formatted_data <- replace_by_string_names(formatted_data, by) # Clean up - rm(trans_sums, form_sums, i) + rm(form_sums, i) + + formatted_data <- assign_row_id(formatted_data, 'd') - formatted_data }, envir=x) add_order_columns(x) @@ -219,7 +243,6 @@ construct_desc_string <- function(..., .fmt_str=NULL) { } else { autos <- c('int'=0, 'dec'=0) } - # Format the transposed value fmt_args <- list(fmt = fmt$repl_str, num_fmt(value, 1, fmt, autos)) diff --git a/R/desc_bindings.R b/R/desc_bindings.R index 820e2a7b..5c31d348 100644 --- a/R/desc_bindings.R +++ b/R/desc_bindings.R @@ -99,3 +99,48 @@ set_custom_summaries <- function(e, ...){ env_bind(e, custom_summaries = params) e } + +#' Set descriptive statistics as columns +#' +#' In many cases, treatment groups are represented as columns within a table. +#' But some tables call for a transposed presentation, where the treatment +#' groups displayed by row, and the descriptive statistics are represented as +#' columns. \code{set_stats_as_columns()} allows Tplyr to output a built table +#' using this transposed format and deviate away from the standard +#' representation of treatment groups as columns. +#' +#' This function leaves all specified by variables intact. The only switch that +#' happens during the build process is that the provided descriptive statistics +#' are transposed as columns and the treatment variable is left as rows. Column +#' variables will remain represented as columns, and multiple target variables +#' will also be respected properly. +#' +#' @param e \code{desc_layer} on descriptive statistics summaries should be represented as columns +#' @param stats_as_columns Boolean to set stats as columns +#' +#' @return The input tplyr_layer +#' @export +#' +#' @examples +#' +#' dat <- tplyr_table(mtcars, gear) %>% +#' add_layer( +#' group_desc(wt, by = vs) %>% +#' set_format_strings( +#' "n" = f_str("xx", n), +#' "sd" = f_str("xx.x", sd, empty = c(.overall = "BLAH")), +#' "Median" = f_str("xx.x", median), +#' "Q1, Q3" = f_str("xx, xx", q1, q3), +#' "Min, Max" = f_str("xx, xx", min, max), +#' "Missing" = f_str("xx", missing) +#' ) %>% +#' set_stats_as_columns() +#' ) %>% +#' build() +#' +set_stats_as_columns <- function(e, stats_as_columns=TRUE) { + assert_inherits_class(e, 'desc_layer') + assert_has_class(stats_as_columns, 'logical') + env_bind(e, stats_as_columns = stats_as_columns) + e +} diff --git a/R/format.R b/R/format.R index 6781ef10..f612521b 100644 --- a/R/format.R +++ b/R/format.R @@ -1,13 +1,13 @@ ### Formatting -#' Create a \code{f_str} object +#' Create a `f_str` object #' -#' \code{f_str} objects are intended to be used within the function -#' \code{set_format_strings}. The \code{f_str} object carries information that -#' powers a significant amount of layer processing. The \code{format_string} -#' parameter is capable of controlling the display of a data point and decimal -#' precision. The variables provided in \code{...} control which data points are -#' used to populate the string formatted output. +#' `f_str` objects are intended to be used within the function +#' `set_format_strings`. The `f_str` object carries information that powers a +#' significant amount of layer processing. The `format_string` parameter is +#' capable of controlling the display of a data point and decimal precision. The +#' variables provided in `...` control which data points are used to populate +#' the string formatted output. #' #' @details Format strings are one of the most powerful components of 'Tplyr'. #' Traditionally, converting numeric values into strings for presentation can @@ -19,12 +19,12 @@ #' #' Tplyr provides both manual and automatic decimal precision formatting. The #' display of the numbers in the resulting data frame is controlled by the -#' \code{format_string} parameter. For manual precision, just like dummy -#' values may be presented on your mocks, integer and decimal precision is -#' specified by the user providing a string of 'x's for how you'd like your -#' numbers formatted. If you'd like 2 integers with 3 decimal places, you -#' specify your string as 'xx.xxx'. 'Tplyr' does the work to get the numbers -#' in the right place. +#' `format_string` parameter. For manual precision, just like dummy values may +#' be presented on your mocks, integer and decimal precision is specified by +#' the user providing a string of 'x's for how you'd like your numbers +#' formatted. If you'd like 2 integers with 3 decimal places, you specify your +#' string as 'xx.xxx'. 'Tplyr' does the work to get the numbers in the right +#' place. #' #' To take this a step further, automatic decimal precision can also be #' obtained based on the collected precision within the data. When creating @@ -44,20 +44,50 @@ #' for the two values. Each format string is independent and relates only to #' the format specified. #' -#' The other parameters of the \code{f_str} call specify what values should -#' fill the x's. \code{f_str} objects are used slightly differently between -#' different layers. When declaring a format string within a count layer, -#' \code{f_str} expects to see the values \code{n} and (if desired) -#' \code{pct}, which specifies the formatting for your n's and percent values. -#' But in descriptive statistic layers, \code{f_str} parameters refer to the -#' names of the summaries being performed, either by built in defaults, or -#' custom summaries declared using \code{\link{set_custom_summaries}}. See -#' \code{\link{set_format_strings}} for some more notes about layers specific -#' implementation. -#' -#' Count and shift layers frequencies and percentages can be specified with -#' 'n' and 'pct' respectively. Distinct values can also be presented in count -#' layers with the arguments 'distinct' and 'distinct_total'. +#' The other parameters of the `f_str` call specify what values should fill +#' the x's. `f_str` objects are used slightly differently between different +#' layers. When declaring a format string within a count layer, `f_str()` +#' expects to see the values `n` or `distinct_n` for event or distinct counts, +#' `pct` or `distinct_pct` for event or distinct percentages, or `total` or +#' `distinct_total` for denominator calculations. But in descriptive statistic +#' layers, `f_str` parameters refer to the names of the summaries being +#' performed, either by built in defaults, or custom summaries declared using +#' [set_custom_summaries()]. See [set_format_strings()] for some more notes +#' about layers specific implementation. +#' +#' An `f_str()` may also be used outside of a Tplyr table. The function +#' [apply_formats()] allows you to apply an `f_str` within the context of +#' [dplyr::mutate()] or more generally a vectorized function. +#' +#' @section Valid `f_str()` Variables by Layer Type: +#' +#' Valid variables allowed within the `...` parameter of `f_str()` differ by +#' layer type. +#' +#' - Count layers +#' - `n` +#' - `pct` +#' - `total` +#' - `distinct_n` +#' - `distinct_pct` +#' - `distinct_total` +#' - Shift layers +#' - `n` +#' - `pct` +#' - `total` +#' - Desc layers +#' - `n` +#' - `mean` +#' - `sd` +#' - `median` +#' - `variance` +#' - `min` +#' - `max` +#' - `iqr` +#' - `q1` +#' - `q3` +#' - `missing` +#' - Custom summaries created by [set_custom_summaries()] #' #' @param format_string The desired display format. X's indicate digits. On the #' left, the number of x's indicates the integer length. On the right, the @@ -71,8 +101,9 @@ #' single element character vector, with the element named '.overall' to #' instead replace the whole string. #' -#' @return A \code{f_str} object +#' @return A `f_str` object #' @export +#' @md #' #' @examples #' @@ -210,7 +241,7 @@ parse_fmt <- function(x) { #' can, while still allowing flexibility to the user. #' #' In a count layer, you can simply provide a single \code{\link{f_str}} -#' object to specify how you want your n's (and possibly percents) formatted. +#' object to specify how you want your n's, percentages, and denominators formatted. #' If you are additionally supplying a statistic, like risk difference using #' \code{\link{add_risk_diff}}, you specify the count formats using the name #' 'n_counts'. The risk difference formats would then be specified using the @@ -297,7 +328,7 @@ set_format_strings <- function(e, ...) { #' @param cap A named character vector containing an 'int' element for the cap #' on integer precision, and a 'dec' element for the cap on decimal precision. #' -#' @return +#' @return tplyr_layer object with formats attached #' @export #' #' @rdname set_format_strings @@ -348,6 +379,10 @@ set_format_strings.desc_layer <- function(e, ..., cap=getOption('tplyr.precision # Identify if auto precision is needed need_prec_table <- any(map_lgl(format_strings, ~ .x$auto_precision)) + # Fill in defaults if cap hasn't fully been provided + if (!('int' %in% names(cap))) cap['int'] <- getOption('tplyr.precision_cap')['int'] + if (!('dec' %in% names(cap))) cap['dec'] <- getOption('tplyr.precision_cap')['dec'] + env_bind(e, format_strings = format_strings, summary_vars = vars(!!!summary_vars), @@ -564,8 +599,8 @@ count_f_str_check <- function(...) { for (name in names(params)) { if (name == "n_counts") { - assert_that(all(params[['n_counts']]$vars %in% c("n", "pct", "distinct", "distinct_n", "distinct_pct")), - msg = "f_str for n_counts in a count_layer can only be n, pct, distinct, or distinct_pct") + assert_that(all(params[['n_counts']]$vars %in% c("n", "pct", "distinct", "distinct_n", "distinct_pct", "total", "distinct_total")), + msg = "f_str for n_counts in a count_layer can only be n, pct, distinct, distinct_pct, total, or distinct_total") # Check to make sure both disintct(old), and distinct_n(new) aren't passed assert_that(!all(c("distinct", "distinct_n") %in% params[["n_counts"]]$vars), @@ -576,7 +611,7 @@ count_f_str_check <- function(...) { msg = "You've passed duplicate parameters to `set_format_strings`") # Replace the disinct with distinct_n - if(any(params[["n_counts"]]$vars %in% "distinct")) { + if (any(params[["n_counts"]]$vars %in% "distinct")) { warning("The use of 'distinct' in count f_strs is discouraged. It was replaced with 'distinct_n' for consistancy.") } params[["n_counts"]]$vars[params[["n_counts"]]$vars %in% "distinct"] <- "distinct_n" diff --git a/R/layer_bindings.R b/R/layer_bindings.R index e5c305af..35d711f6 100644 --- a/R/layer_bindings.R +++ b/R/layer_bindings.R @@ -177,3 +177,75 @@ set_precision_on <- function(layer, precision_on) { layer } + +#' Set precision data +#' +#' In some cases, there may be organizational standards surrounding decimal precision. +#' For example, there may be a specific standard around the representation of precision relating +#' to lab results. As such, `set_precision_data()` provides an interface to provide integer and +#' decimal precision from an external data source. +#' +#' The ultimate behavior of this feature is just that of the existing auto precision method, except +#' that the precision is specified in the provided precision dataset rather than inferred from the source data. +#' At a minimum, the precision dataset must contain the integer variables `max_int` and `max_dec`. If by variables +#' are provided, those variables must be available in the layer by variables. +#' +#' When the table is built, by default Tplyr will error if the precision dataset is missing by variable groupings +#' that exist in the target dataset. This can be overriden using the `default` parameter. If `default` is set to +#' "auto", any missing values will be automatically inferred from the source data. +#' +#' @param layer A \code{tplyr_layer} object +#' @param prec A dataframe following the structure specified in the function details +#' @param default Handling of unspecified by variable groupings. Defaults to 'error'. Set to 'auto' to automatically infer any missing groups. +#' +#' @md +#' @export +#' +#' @examples +#' +#' prec <- tibble::tribble( +#' ~vs, ~max_int, ~max_dec, +#' 0, 1, 1, +#' 1, 2, 2 +#' ) +#' +#' tplyr_table(mtcars, gear) %>% +#' add_layer( +#' group_desc(wt, by = vs) %>% +#' set_format_strings( +#' 'Mean (SD)' = f_str('a.a+1 (a.a+2)', mean, sd) +#' ) %>% +#' set_precision_data(prec) %>% +#' set_precision_on(wt) +#' ) %>% +#' build() +#' +set_precision_data <- function(layer, prec, default = c("error", "auto")) { + + default <- match.arg(default) + + # Grab the metadata + precision_by <- names(prec)[which(!names(prec) %in% c('max_int', 'max_dec'))] + precision_by_syms <- map(precision_by, sym) + + # Insert the by variables in the layer and let set_precision_by validate + set_precision_by(layer, vars(!!!precision_by_syms)) + + # Checks + # max_int and max_dec are both on precision dataset + assert_that( + all(c('max_int', 'max_dec') %in% names(prec)), + msg = "Precision dataset must include the variables max_int and max_dec" + ) + + # max_int and max_dec are all valid integers + assert_that( + sum(c(prec$max_int, prec$max_dec) %% 1) == 0, + msg = "max_int and max_dec in precision dataset must be valid integer values" + ) + + # Bind it to the layer + env_bind(layer, prec = prec) + env_bind(layer, prec_error = default) + layer +} diff --git a/R/layer_templates.R b/R/layer_templates.R new file mode 100644 index 00000000..3de3c246 --- /dev/null +++ b/R/layer_templates.R @@ -0,0 +1,325 @@ +#' Create, view, extract, remove, and use Tplyr layer templates +#' +#' There are several scenarios where a layer template may be useful. Some +#' tables, like demographics tables, may have many layers that will all +#' essentially look the same. Categorical variables will have the same count +#' layer settings, and continuous variables will have the same desc layer +#' settings. A template allows a user to build those settings once per layer, +#' then reference the template when the Tplyr table is actually built. +#' +#' This suite of functions allows a user to create and use layer templates. +#' Layer templates allow a user to pre-build and reuse an entire layer +#' configuration, from the layer constructor down to all modifying functions. +#' Furthermore, users can specify parameters they may want to be +#' interchangeable. Additionally, layer templates are extensible, so a template +#' can be use and then further extended with additional layer modifying +#' functions. +#' +#' Layers are created using `new_layer_template()`. To use a layer, use the +#' function `use_template()` in place of `group_count|desc|shift()`. If you want +#' to view a specific template, use `get_layer_template()`. If you want to view +#' all templates, use `get_layer_templates()`. And to remove a layer template use +#' `remove_layer_template()`. Layer templates themselves are stored in the +#' option `tplyr.layer_templates`, but a user should not access this directly +#' and instead use the Tplyr supplied functions. +#' +#' When providing the template layer syntax, the layer must start with a layer +#' constructor. These are one of the function `group_count()`, `group_desc()`, +#' or `group_shift()`. Instead of passing arguments into these function, +#' templates are specified using an ellipsis in the constructor, i.e. +#' `group_count(...)`. This is required, as after the template is built a user +#' supplies these arguments via `use_template()` +#' +#' `use_template()` takes the `group_count|desc|shift()` arguments by default. +#' If a user specified additional arguments in the template, these are provided +#' in a list throught the argument `add_params`. Provide these arguments exactly +#' as you would in a normal layer. When creating the template, these parameters +#' can be specified by using curly brackets. See the examples for details. +#' +#' @param name Template name +#' @param template Template layer syntax, starting with a layer constructor +#' `group_count|desc|shift`. This function should be called with an ellipsis +#' argument (i.e. group_count(...)). +#' +#' @md +#' @export +#' +#' @family Layer Templates +#' @rdname layer_templates +#' +#' @examples +#' +#' op <- options() +#' +#' new_layer_template( +#' "example_template", +#' group_count(...) %>% +#' set_format_strings(f_str('xx (xx%)', n, pct)) +#' ) +#' +#' get_layer_templates() +#' +#' get_layer_template("example_template") +#' +#' tplyr_table(mtcars, vs) %>% +#' add_layer( +#' use_template("example_template", gear) +#' ) %>% +#' build() +#' +#' remove_layer_template("example_template") +#' +#' new_layer_template( +#' "example_template", +#' group_count(...) %>% +#' set_format_strings(f_str('xx (xx%)', n, pct)) %>% +#' set_order_count_method({sort_meth}) %>% +#' set_ordering_cols({sort_cols}) +#' ) +#' +#' get_layer_template("example_template") +#' +#' tplyr_table(mtcars, vs) %>% +#' add_layer( +#' use_template("example_template", gear, add_params = +#' list( +#' sort_meth = "bycount", +#' sort_cols = `1` +#' )) +#' ) %>% +#' build() +#' +#' remove_layer_template("example_template") +#' +#' options(op) +new_layer_template <- function(name, template) { + template <- enexpr(template) + + # Have to convert the call to a character and collapse it to order text correctly + raw_template <- paste0(c(template), collapse="\n") + + # Enforce ellipsis on layer constructors + if (!str_detect(raw_template, "group_(count|desc|shift)\\(\\.{3}\\)")) { + msg <- paste0( + "Invalid template - templates must start with an ellipsis (i.e. ...) passed to either ", + "group_count, group_desc, or group_shift. For example, group_count(...)" + ) + stop(msg, call.=FALSE) + } + + # Make sure that the template is valid + modify_nested_call(template, examine_only = TRUE) + + if (name %in% names(getOption("tplyr.layer_templates"))) { + warning( + sprintf("A template by the name %s already exists. Template will be overwritten.", name), + call. = FALSE + ) + remove_layer_template(name) + } + + # Find any add_params provided into the template. + # The indexing here is pulling the first element out of the list, because I + # only pass a single element character vector. Next, I use [,2] because I know + # I need column two because I'm specifically requesting the group within the + # parens of the regex + params <- str_match_all(raw_template, "\\{\\s+([\\w\\.]+)\\s+\\}")[[1]][,2] + + # Turn the template into a function, with class to mark as a template + func <-structure( + paste0(c("{",raw_template,"}"), collapse="\n"), + params = params, + template_name = name, + class = c("tplyr_layer_template") + ) + + add_func <- list(func) + names(add_func) <- name + + # Insert the function into the Tplyr namespace + options( + tplyr.layer_templates = append(getOption("tplyr.layer_templates"), add_func) + ) +} + +#' @family Layer Templates +#' @rdname layer_templates +#' @export +remove_layer_template <- function(name) { + tmps <- getOption('tplyr.layer_templates') + + if (name %in% names(tmps)) { + options(tplyr.layer_templates = tmps[names(tmps) != name]) + } else{ + warning(sprintf("No template named %s", name)) + } +} + +#' @family Layer Templates +#' @rdname layer_templates +#' @export +get_layer_template <- function(name) { + tmps <- getOption('tplyr.layer_templates') + if (!(name %in% names(tmps))) { + stop(sprintf("Template %s does not exist", name), call.=FALSE) + } + tmps[[name]] +} + +#' @family Layer Templates +#' @rdname layer_templates +#' @export +get_layer_templates <- function() { + getOption('tplyr.layer_templates') +} + +#' @param ... Arguments passed directly into a layer constructor, matching the +#' target, by, and where parameters. +#' @param add_params Additional parameters passed into layer modifier functions. +#' These arguments are specified in a template within curly brackets such as +#' {param}. Supply as a named list, where the element name is the parameter. +#' +#' @family Layer Templates +#' @rdname layer_templates +#' @export +use_template <- function(name, ..., add_params = NULL) { + + # From the add_params call, pull out the call arguments This allows us to + # capture any names or call as name or calls without quoting non-quoted + # arguments within the list + add_params <- enquo(add_params) + + if (is_call(quo_get_expr(add_params))) { + if (call_name(add_params) != "list") { + stop("Arguments must be passed to `add_params` in a list.", call.=FALSE) + } + add_params_args <- call_args(add_params) + + if (!is_named(add_params_args)) { + stop("Arguments pass in `add_params` must be named", call.=FALSE) + } + } else if (!is.null(quo_get_expr(add_params))) { + stop("Arguments must be passed to `add_params` in a list.", call.=FALSE) + } else { + add_params_args <- list() + } + + template <- get_layer_template(name) + + if (!inherits(template, "tplyr_layer_template")) { + stop("Invalid template - templates must be created using `new_layer_template()`", call.=FALSE) + } + + # Param checks + template_params <- attr(template, 'params') + + # Args provided not in template + invalid_args <- setdiff(names(add_params_args), template_params) + # Template params not in args + missing_args <- setdiff(template_params, names(add_params_args)) + + if (!is_empty(invalid_args)) { + bad_args <- paste0(invalid_args, collapse=", ") + stop( + sprintf("In use_template() the following parameters provided to add_params are invalid: %s", bad_args), + call.=FALSE + ) + } + + if (!is_empty(missing_args)) { + bad_args <- paste0(missing_args, collapse=", ") + stop( + sprintf("In use_template() the following parameters provided to add_params are missing: %s", bad_args), + call.=FALSE + ) + } + + # Based on the types provided in template_param, only run those replaces + template <- make_template(template, add_params_args) + + template_func <- eval(str2lang(paste0(c("function(..., add_params) {", template, "}"), collapse="\n"))) + + template_func(..., add_params = add_params_args) +} + +#' Process the template function text ready for evaluation +#' +#' In order for the template function to evaluate properly, names or calls must +#' be unquoted when they're passed into the layer modifying functions. +#' Otherwise, the template parameter function itself will be passed in and +#' quoted, failing type checks of the layer modifiers. This internal function +#' processes the template to add the template_params functions and unquote as +#' necessary. +#' +#' @param template Template function as a character string +#' @param add_params_args Argument list captured in use_template +#' +#' @return Template function text with template_param arguments inserted +#' +#' @noRd +make_template <- function(template, add_params_args) { + # Pick out the classes of each argument + arg_types <- map_chr(add_params_args, class) + + # Find the arguments that actually need to be unquoted and the ones that + # don't separately + quo_arg_names <- add_params_args[which(arg_types %in% c("name", "call"))] + oth_arg_names <- add_params_args[which(!(arg_types %in% c("name", "call")))] + + # Get those arguments names + quo_search_names <- names(quo_arg_names) + oth_search_names <- names(oth_arg_names) + + # This regex finds the text template_param(" based on the names + # supplied above, and creates a group around template_param so we can target + # that replacement + rx_str <- "\\{\\s+(%s)\\s+\\}" + quo_rx <- regex(sprintf(rx_str, + paste(quo_search_names, collapse="|"))) + oth_rx <- regex(sprintf(rx_str, + paste(oth_search_names, collapse="|"))) + + # Finally, replace only replace the curly bracket arguments with a call to + # template_param, unquoting for calls and names and not unquoting for + # other arguments. + out_template <- str_replace(template, quo_rx, "!!template_param(\'\\1\')") + str_replace(out_template, oth_rx, "template_param(\'\\1\')") +} + +#' Extract a parameter in a template context +#' +#' Beyond the group_ functions, templates need a method to hand parameters +#' from use_template down to other tplyr_layer modifier functions. Users supply +#' arguments to the template and no to the modifier functions themselves, so +#' those arguments need to be passed down. Furthermore, we don't want them to +#' have to think about quasiquotation. So the purpose of `template_param()` is +#' to abstract the handing of parameters away from the user and allow them to +#' just use the add_params parameter instead. +#' +#' @param param_name Parameter name specified in add_params of `use_template()` +#' +#' @return Extracted parameter +#' +#' @noRd +template_param <- function(param_name) { + # The caller_env() is where template_param() was called + # The parent of that environment is template_func(), which was created + # in use_template(). So we look back there for the add_params argument + # passed down from use_template + add_params <- env_parent(caller_env())[['add_params']] + # Pull the desired parameter out and return it + param <- add_params[[param_name]] + param +} + +#' @export +print.tplyr_layer_template <- function(x, ...) { + cat(sprintf("Template name: %s\n", attr(x, 'template_name'))) + params <- attr(x, 'params') + if (is_empty(params)) { + params <- "None" + } + cat(sprintf("Template parameters: %s\n", paste(c(params), collapse=", "))) + cat("Template code:\n") + cat(x, "\n") +} diff --git a/R/layering.R b/R/layering.R index 4a316679..6a9e47c5 100644 --- a/R/layering.R +++ b/R/layering.R @@ -90,7 +90,6 @@ add_layer <- function(parent, layer, name=NULL) { #' @param parent A \code{tplyr_table} or \code{tplyr_layer}/\code{tplyr_subgroup_layer} object #' @param ... Layers to be added #' -#' @return #' @export #' #' @family Layer attachment diff --git a/R/meta-builders.R b/R/meta-builders.R new file mode 100644 index 00000000..941b679e --- /dev/null +++ b/R/meta-builders.R @@ -0,0 +1,245 @@ +#' Use available metadata to build the tplyr_meta object +#' +#' This is the main driver function, and layer specific variants +#' adapt on top of this function +#' +#' @param table_where Table level where filter +#' @param layer_where Layer level where filter +#' @param treat_grps Treatment groups from the tplyr_table parent environment +#' @param ... All grouping variables +#' +#' @return tplyr_meta object +#' @noRd +build_meta <- function(table_where, layer_where, treat_grps, variables, values) { + + # Make an assumption that the treatment variable was the first variable provided + values[[1]] <- translate_treat_grps(values[[1]], treat_grps) + + filters <- make_parsed_strings(variables, values) + + meta <- new_tplyr_meta( + names = variables, + filters = filters + ) + + meta <- meta %>% + add_filters_(layer_where) %>% + add_variables_(get_vars_from_filter(layer_where)) %>% + add_filters_(table_where) %>% + add_variables_(get_vars_from_filter(table_where)) + + meta +} + +#' Build metadata for desc_layers +#' +#' @param target Target variable currently being summarized +#' @param table_where Table level where filter +#' @param layer_where Layer level where filter +#' @param treat_grps Treatment groups from the tplyr_table parent environment +#' @param ... All grouping variables +#' +#' @return tplyr_meta object +#' @noRd +build_desc_meta <- function(target, table_where, layer_where, treat_grps, ...) { + + variables <- call_args(match.call()) + + # Don't want any of the named parameters here + variables <- variables[which(names(variables)=='')] + values <- list(...) + + # Get rid of text provided by variables + inds <- which(map_lgl(unname(variables), ~ quo_class(.) == "name")) + variables <- variables[inds] + values <- values[inds] + + # Output vector + meta <- vector('list', length(values[[1]])) + + # Vectorize across the input data + for (i in seq_along(values[[1]])) { + # Pull out the current row's values + cur_values <- map(values, ~ .x[i]) + # Build the tplyr_meta object + meta[[i]] <- build_meta(table_where, layer_where, treat_grps, variables, cur_values) %>% + add_variables_(target) + } + + meta +} + +#' Build metadata for count_layers +#' +#' @param target Target variable currently being summarized +#' @param table_where Table level where filter +#' @param layer_where Layer level where filter +#' @param treat_grps Treatment groups from the tplyr_table parent environment +#' @param ... All grouping variables +#' +#' @return tplyr_meta object +#' @noRd +build_count_meta <- function(layer, table_where, layer_where, treat_grps, summary_var, ...) { + + variables <- call_args(match.call()) + + # Don't want any of the named parameters here + variables <- variables[which(names(variables)=='')] + values <- list(...) + + # Get rid of text provided by variables + inds <- which(map_lgl(unname(variables), ~ quo_class(.) == "name")) + variables <- variables[inds] + values <- values[inds] + + # The total row label may not pass through, so set it + total_row_label <- ifelse(is.null(layer$total_row_label), 'Total', layer$total_row_label) + count_missings <- ifelse(is.null(layer$count_missings), FALSE, layer$count_missings) + mlist <- layer$missing_count_list + + # If the outer layer was provided as a text variable, get value + character_outer <- get_character_outer(layer) + unnested_character <- is_unnested_character(layer) + + meta <- vector('list', length(values[[1]])) + + # Vectorize across the input data + for (i in seq_along(values[[1]])) { + + if (!unnested_character) { + add_vars <- layer$target_var + } else { + add_vars <- quos() + } + + row_filter <- list() + + # Pull out the current row's values + cur_values <- map(values, ~ .x[i]) + + # The outer layer will currently be NA for the outer layer summaries, so adjust the filter appropriately + if (any(is.na(cur_values))) { + + # Total row or outer layer + na_var <- variables[which(is.na(cur_values))] + + # work around outer letter being NA + filter_variables <- variables[which(!is.na(cur_values))] + filter_values <- cur_values[which(!is.na(cur_values))] + + if (summary_var[i] == total_row_label && !count_missings) { + # Filter out the missing counts if the total row should exclude missings + row_filter <- make_parsed_strings(layer$target_var, list(mlist), negate=TRUE) + } + else if (summary_var[i] %in% names(mlist)) { + # Get the values for the missing row + miss_val <- mlist[which(names(mlist) == summary_var[i])] + row_filter <- make_parsed_strings(layer$target_var, list(miss_val)) + } + else if (summary_var[i] != total_row_label) { + # Subset to outer layer value + row_filter <- make_parsed_strings(na_var, summary_var[i]) + } + + add_vars <- append(add_vars, na_var) + + } + else { + # Inside the nested layer + filter_variables <- variables + filter_values <- cur_values + + # Toss out the indentation + if (!is.null(layer$indentation) && str_starts(summary_var[i], layer$indentation)) { + summary_var[i] <- str_sub(summary_var[i], layer$indentation_length+1) + } + + if (summary_var[i] %in% names(mlist)) { + # Get the values for the missing row + miss_val <- mlist[which(names(mlist) == summary_var[i])] + row_filter <- make_parsed_strings(layer$target_var, list(miss_val)) + } + else if (summary_var[i] == total_row_label && !count_missings) { + # Filter out the missing counts if the total row should exclude missings + row_filter <- make_parsed_strings(layer$target_var, list(mlist), negate=TRUE) + } + else if (!is.na(character_outer) && summary_var[i] == character_outer) { + # If the outer layer is a character string then don't provide a filter + row_filter <- list() + } + else if (summary_var[i] != total_row_label && !unnested_character) { + # If we're not in a total row, build the filter + row_filter <- make_parsed_strings(layer$target_var, summary_var[i]) + } + } + + # Make the meta object + meta[[i]] <- build_meta(table_where, layer_where, treat_grps, filter_variables, filter_values) %>% + add_filters_(row_filter) %>% + add_variables_(add_vars) + + } + + meta +} + +#' Build metadata for risk difference comparisons +#' +#' @param meta A tplyr_metadata object +#' @param treat_var the treatment variable +#' @param comp The current rdiff comparison +#' +#' @return tplyr_meta object +#' @noRd +build_rdiff_meta <- function(meta, treat_var, comp){ + + for (i in seq_along(meta)) { + # Make a new filter that contains the current comparison being made + filt <- make_parsed_strings(list(treat_var), list(comp))[[1]] + # Add the filter in the spot where the treatment groups are held, + # which is always the first element (in a count layer) + meta[[i]]$filters[[1]] <- filt + } + + meta +} + +#' Build metadata for shift_layers +#' +#' @param target Target variable currently being summarized +#' @param table_where Table level where filter +#' @param layer_where Layer level where filter +#' @param treat_grps Treatment groups from the tplyr_table parent environment +#' @param ... All grouping variables +#' +#' @return tplyr_meta object +#' @noRd +build_shift_meta <- function(layer, table_where, layer_where, treat_grps, summary_var, ...) { + + variables <- call_args(match.call()) + + # Don't want any of the named parameters here + variables <- variables[which(names(variables)=='')] + values <- list(...) + + # Get rid of text provided by variables + inds <- which(map_lgl(unname(variables), ~ quo_class(.) == "name")) + variables <- variables[inds] + values <- values[inds] + + meta <- vector('list', length(values[[1]])) + + # Vectorize across the input data + for (i in seq_along(values[[1]])) { + + # Pull out the current row's values + cur_values <- map(values, ~ .x[i]) + + # Make the meta object + meta[[i]] <- build_meta(table_where, layer_where, treat_grps, variables, cur_values) %>% + add_variables_(layer$target_var$row) %>% + add_filters_(make_parsed_strings(layer$target_var['row'], list(summary_var[i]))) + } + + meta +} diff --git a/R/meta-helpers.R b/R/meta-helpers.R new file mode 100644 index 00000000..d789e44f --- /dev/null +++ b/R/meta-helpers.R @@ -0,0 +1,179 @@ +#' Return proper quoting for a given value +#' +#' This function returns whatever value should be necessary to +#' create the string for a value that will be parsed. For example, +#' in `x == 'hi'`, the value 'hi' must be quoted like a string. But +#' if the input variable is numeric, such as `x == 1`, the 1 should +#' not be provided in quotes. +#' +#' @param val Value which needs parsing +#' +#' @return A character string +#' @noRd +#' +#' @examples +#' +#' get_parse_string_value('hello') +#' get_parse_string_value(1) +get_parse_string_value <- function(val) { + if (class(val) %in% c('character', 'factor') && !is.na(val)) { + paste0('"', val, '"') + } else{ + val + } +} + +#' Convert supplied values into a string that will parse as a vector +#' +#' By passing in some vector, the text necessary to create that vector is returned. +#' +#' @param values +#' +#' @return Character string +#' @noRd +#' +#' +#' @examples +#' +#' x <- make_vect_str(c(1,2,3)) +#' y <- parse(text = x) +#' eval(y) +#' +#' x <- make_vect_str(c('a', 'b', 'c')) +#' y <- parse(text = x) +#' eval(y) +make_vect_str <- function(values) { + inner <- paste0(map_chr(values, get_parse_string_value), collapse = ", ") + + paste(c('c(', inner, ')'), collapse = "") +} + +#' Create a parsed string necessary to create filter logic +#' +#' Given a symbol and values, this function will return an expression required +#' to subset the given variable to that set of values +#' +#' @param variables Variables to filter +#' @param values Values to be filtered +#' @param negate Negate the filter +#' +#' @noRd +make_parsed_strings <- function(variables, values, negate=FALSE) { + + out <- vector('list', length(variables)) + + for (i in seq_along(variables)) { + + vals <- values[[i]] + vname <- as_label(variables[[i]]) + + na_present <- any(is.na(vals)) + na_s <- paste0("is.na(",vname,")") + vals <- vals[which(!is.na(vals))] + + pre <- "" + post <- "" + preneg <- "" + + if (negate) { + na_s <- paste0("!", na_s) + eq <- "!=" + comb <- "&" + } else { + eq <- "==" + comb <- "|" + } + + # Store the NA string as output upfront + s <- na_s + + if (length(vals) >= 1) { + + if (length(vals) > 1) { + if (negate) { + pre <- "!(" + post <- ")" + } + opr <- "%in%" + } else{ + opr <- eq + } + + # Build the filter string and negate plurals if necessary + s <- paste0(pre, vname, " ", opr, " ", make_vect_str(vals), post) + + # Tack on NA's if necessary + if (na_present) { + s <- paste0(s, comb, na_s) + } + + } + + out[[i]] <- str2lang(s) + } + out +} + +#' Return the vector of treatment groups based on treatment column +#' +#' Given that sets of treatment groups can be combined, this function +#' allows you to get the original treatment groups back out of the specified +#' combination name +#' +#' @param value Specified treatment group +#' @param layer Tplyr layer +#' +#' @return A character vector of treatment groups +#' @noRd +translate_treat_grps <- function(value, treat_grps) { + out <- as.character(value) + if (out %in% names(treat_grps)) { + out <- treat_grps[[out]] + } + out +} + +#' Translate a filter expression to the symbols in the filter +#' +#' This function will return a list of symbols that are present +#' in a give filter expression +#' +#' @param f Filter expression +#' +#' @return List of symbols +#' @noRd +get_vars_from_filter <- function(f) { + syms(all.vars(quo_get_expr(f))) +} + +#' Extract value of outer layer text value +#' +#' @param layer A Tplyr layer object +#' +#' @return Single element character vector +#' @noRd +get_character_outer <- function(layer) { + qlist <- layer$target_var_saved + + if (!is.null(qlist) && !quo_is_symbol(qlist[[1]])) { + return(quo_get_expr(qlist[[1]])) + } else{ + return(NA_character_) + } +} + +#' Check if a layer is unnested with character target +#' +#' @param layer A Tplyr layer object +#' +#' @return Boolean +#' @noRd +is_unnested_character <- function(layer) { + unnested <- is.null(layer$target_var_saved) + + if (unnested) { + return(!quo_is_symbol(layer$target_var[[1]])) + } else{ + return(FALSE) + } +} diff --git a/R/meta.R b/R/meta.R new file mode 100644 index 00000000..78ccaa2f --- /dev/null +++ b/R/meta.R @@ -0,0 +1,228 @@ +#' Tplyr Metadata Object +#' +#' If a Tplyr table is built with the `metadata=TRUE` option specified, then +#' metadata is assembled behind the scenes to provide traceability on each +#' result cell derived. The functions `get_meta_result()` and +#' `get_meta_subset()` allow you to access that metadata by using an ID provided +#' in the row_id column and the column name of the result you'd like to access. +#' The purpose is of the row_id variable instead of a simple row index is to +#' provide a sort resistant reference of the originating column, so the output +#' Tplyr table can be sorted in any order but the metadata are still easily +#' accessible. +#' +#' The `tplyr_meta` object provided a list with two elements - names and +#' filters. The names contain every column from the target data.frame of the +#' Tplyr table that factored into the specified result cell, and the filters +#' contains all the necessary filters to subset the target data to create the +#' specified result cell. `get_meta_subset()` additionally provides a parameter to +#' specify any additional columns you would like to include in the returned +#' subset data frame. +#' +#' @param names List of symbols +#' @param filters List of expressions +#' +#' @return tplyr_meta object +#' @export +#' +#' @examples +#' +#' tplyr_meta( +#' names = rlang::quos(x, y, z), +#' filters = rlang::quos(x == 1, y==2, z==3) +#' ) +#' +tplyr_meta <- function(names=list(), filters=exprs()) { + meta <- new_tplyr_meta() + meta <- add_variables(meta, names) + meta <- add_filters(meta, filters) + meta +} + +#' Create a tplyr_meta object +#' +#' @return tplyr_meta object +#' @noRd +new_tplyr_meta <- function(names = list(), filters=exprs()) { + structure( + list( + names = names, + filters = filters + ), + class = 'tplyr_meta' + ) +} + +#' Add variables to a tplyr_meta object +#' +#' Add additional variable names to a `tplyr_meta()` object. +#' +#' @param meta A tplyr_meta object +#' @param names A list of names, providing variable names of interest. Provide +#' as a list of quosures using `rlang::quos()` +#' +#' @return tplyr_meta object +#' @md +#' +#' @family Metadata additions +#' @rdname metadata_additions +#' +#' @export +#' +#' @examples +#' +#' m <- tplyr_meta() +#' m <- add_variables(m, rlang::quos(a, b, c)) +#' m <- add_filters(m, rlang::quos(a==1, b==2, c==3)) +#' m +add_variables <- function(meta, names) { + + if (!all(map_lgl(names, ~ is_quosure(.) && quo_is_symbol(.)))) { + stop("Names must be provided as a list of names", call.=FALSE) + } + + if (!inherits(meta, 'tplyr_meta')) { + stop("meta must be a tplyr_meta object", call.=FALSE) + } + + add_variables_(meta, names) +} + +#' Internal application of variables onto tplyr_meta object +#' @noRd +add_variables_ <- function(meta, names) { + meta$names <- append(meta$names, names) + meta +} + +#' @param filters A list of symbols, providing variable names of interest. Provide +#' as a list of quosures using `rlang::quos()` +#' +#' @family Metadata additions +#' @rdname metadata_additions +#' +#' @export +add_filters <- function(meta, filters) { + + if (!all(map_lgl(filters, ~ is_quosure(.) && quo_is_call(.)))) { + stop("Filters must be provided as a list of calls", call.=FALSE) + } + + if (!inherits(meta, 'tplyr_meta')) { + stop("meta must be a tplyr_meta object", call.=FALSE) + } + + add_filters_(meta, filters) +} + +#' Internal application of filters onto tplyr_meta object +#' @noRd +add_filters_ <- function(meta, filters) { + meta$filters <- append(meta$filters, filters) + meta +} + +#' Get the metadata dataframe from a tplyr_table +#' +#' Pull out the metadata dataframe from a tplyr_table to work with it directly +#' +#' @param t A Tplyr table with metadata built +#' +#' @return Tplyr metadata dataframe +#' @export +#' +#' @examples +#' t <- tplyr_table(mtcars, gear) %>% +#' add_layer( +#' group_desc(wt) +#' ) +#' +#' t %>% +#' build(metadata=TRUE) +#' +#' get_metadata(t) +get_metadata <- function(t) { + + if (!inherits(t, 'tplyr_table')) { + stop("t must be a tplyr_table object", call.=FALSE) + } + + if (is.null(t$metadata)){ + stop(paste( + "t does not contain a metadata dataframe.", + "Make sure the tplyr_table was built with `build(metadata=TRUE)`")) + } + + return(t$metadata) +} + +#' Append the Tplyr table metadata dataframe +#' +#' `append_metadata()` allows a user to extend the Tplyr metadata data frame +#' with user provided data. In some tables, Tplyr may be able to provided most +#' of the data, but a user may have to extend the table with other summaries, +#' statistics, etc. This function allows the user to extend the tplyr_table's +#' metadata with their own metadata content using custom data frames created +#' using the `tplyr_meta` object. +#' +#' As this is an advanced feature of Tplyr, ownership is on the user to make +#' sure the metadata data frame is assembled properly. The only restrictions +#' applied by `append_metadata()` are that `meta` must have a column named +#' `row_id`, and the values in `row_id` cannot be duplicates of any `row_id` +#' value already present in the Tplyr metadata dataframe. `tplyr_meta()` objects +#' align with constructed dataframes using the `row_id` and output dataset +#' column name. As such, `tplyr_meta()` objects should be inserted into a data +#' frame using a list column. +#' +#' +#' @param t A tplyr_table object +#' @param meta A dataframe fitting the specifications of the details section of +#' this function +#' +#' @return A tplyr_table object +#' @export +#' @md +#' +#' @examples +#' t <- tplyr_table(mtcars, gear) %>% +#' add_layer( +#' group_desc(wt) +#' ) +#' +#' t %>% +#' build(metadata=TRUE) +#' +#' m <- tibble::tibble( +#' row_id = c('x1_1'), +#' var1_3 = list(tplyr_meta(rlang::quos(a, b, c), rlang::quos(a==1, b==2, c==3))) +#' ) +#' +#' append_metadata(t, m) +append_metadata <- function(t, meta) { + + if (!('row_id' %in% names(meta))) { + stop("The provided metadata dataset must have a column named row_id", call.=FALSE) + } + + if (any(meta$row_id %in% t$metadata$row_id)) { + stop( + paste("row_id values in the provided metadata dataset are duplicates of", + "row_id values in the Tplyr metadata. All row_id values must be unique.", + call.=FALSE) + ) + } + + t$metadata <- bind_rows(t$metadata, meta) + t +} + +#' @export +print.tplyr_meta <- function(x, ...) { + cat(sprintf("tplyr_meta: %d names, %d filters\n", length(x$names), length(x$filters))) + cat("Names:\n") + names <- map_chr(x$names, as_label) + filters <- map_chr(x$filters, as_label) + cat(" ", paste(names, collapse = ", "), "\n") + cat("Filters:\n") + cat(" ", paste(filters, collapse = ", "), "\n") + invisible() +} diff --git a/R/meta_utils.R b/R/meta_utils.R new file mode 100644 index 00000000..fabbbcbd --- /dev/null +++ b/R/meta_utils.R @@ -0,0 +1,176 @@ +#' Extract the result metadata of a Tplyr table +#' +#' Given a row_id value and a result column, this function will return the +#' tplyr_meta object associated with that 'cell'. +#' +#' If a Tplyr table is built with the `metadata=TRUE` option specified, then +#' metadata is assembled behind the scenes to provide traceability on each +#' result cell derived. The functions `get_meta_result()` and +#' `get_meta_subset()` allow you to access that metadata by using an ID provided +#' in the row_id column and the column name of the result you'd like to access. +#' The purpose is of the row_id variable instead of a simple row index is to +#' provide a sort resistant reference of the originating column, so the output +#' Tplyr table can be sorted in any order but the metadata are still easily +#' accessible. +#' +#' The `tplyr_meta` object provided a list with two elements - names and +#' filters. The metadata contain every column from the target data.frame of the +#' Tplyr table that factored into the specified result cell, and the filters +#' contains all the necessary filters to subset to data summarized to create the +#' specified result cell. `get_meta_subset()` additionally provides a parameter to +#' specify any additional columns you would like to include in the returned +#' subset data frame. +#' +#' @param x A built Tplyr table or a dataframe +#' @param row_id The row_id value of the desired cell, provided as a character +#' string +#' @param column The result column of interest, provided as a character string +#' @param ... additional arguments +#' +#' @return A tplyr_meta object +#' @md +#' +#' @export +#' +#' @examples +#' t <- tplyr_table(mtcars, cyl) %>% +#' add_layer( +#' group_desc(hp) +#' ) +#' +#' dat <- t %>% build(metadata = TRUE) +#' +#' get_meta_result(t, 'd1_1', 'var1_4') +#' +#' m <- t$metadata +#' dat <- t$target +#' +#' get_meta_result(t, 'd1_1', 'var1_4') +get_meta_result <- function(x, row_id, column, ...) { + UseMethod("get_meta_result") +} + +#' @export +get_meta_result.tplyr_table <- function(x, row_id, column, ...) { + m <- x$metadata + + get_meta_result.data.frame(m, row_id, column) +} + +#' @export +get_meta_result.data.frame <- function(x, row_id, column, ...) { + if (!inherits(row_id, 'character') || !(row_id %in% x$row_id)) { + stop('Invalid row_id selected. row_id must be provided as a string present in built Tplyr table.', + call.=FALSE) + } + + if (!inherits(column, 'character') || !(column %in% names(x))) { + stop(paste0('column must provided as a character string and a valid result ', + 'column present in the built Tplyr dataframe'), call.=FALSE) + } + + # Pull out the cell of interest + res <- x[[which(x$row_id == row_id), column]][[1]] + + if (!inherits(res, 'tplyr_meta')) { + stop('Specified column must be a result column', call.=FALSE) + } + + res +} + +#' Extract the subset of data based on result metadata +#' +#' Given a row_id value and a result column, this function will return the +#' subset of data referenced by the tplyr_meta object associated with that +#' 'cell', which provides traceability to tie a result to its source. +#' +#' If a Tplyr table is built with the `metadata=TRUE` option specified, then +#' metadata is assembled behind the scenes to provide traceability on each +#' result cell derived. The functions `get_meta_result()` and +#' `get_meta_subset()` allow you to access that metadata by using an ID provided +#' in the row_id column and the column name of the result you'd like to access. +#' The purpose is of the row_id variable instead of a simple row index is to +#' provide a sort resistant reference of the originating column, so the output +#' Tplyr table can be sorted in any order but the metadata are still easily +#' accessible. +#' +#' The `tplyr_meta` object provided a list with two elements - names and +#' filters. The metadata contain every column from the target data.frame of the +#' Tplyr table that factored into the specified result cell, and the filters +#' contains all the necessary filters to subset to data summarized to create the +#' specified result cell. `get_meta_subset()` additionally provides a parameter +#' to specify any additional columns you would like to include in the returned +#' subset data frame. +#' +#' @param x A built Tplyr table or a dataframe +#' @param row_id The row_id value of the desired cell, provided as a character +#' string +#' @param column The result column of interest, provided as a character string +#' @param add_cols Additional columns to include in subset data.frame output +#' @param target A data frame to be subset (if not pulled from a Tplyr table) +#' @param ... additional arguments +#' +#' @return A data.frame +#' @rdname get_meta_subset +#' @md +#' +#' @export +#' +#' @examples +#' t <- tplyr_table(mtcars, cyl) %>% +#' add_layer( +#' group_desc(hp) +#' ) +#' +#' +#' dat <- t %>% build(metadata = TRUE) +#' +#' get_meta_subset(t, 'd1_1', 'var1_4', add_cols = dplyr::vars(carb)) +#' +#' m <- t$metadata +#' dat <- t$target +#' +#' get_meta_subset(t, 'd1_1', 'var1_4', add_cols = dplyr::vars(carb), target = target) +get_meta_subset <- function(x, row_id, column, add_cols = vars(USUBJID), ...) { + UseMethod("get_meta_subset") +} + +#' @export +#' @rdname get_meta_subset +get_meta_subset.data.frame <- function(x, row_id, column, + add_cols = vars(USUBJID), target = NULL, ...) { + # Get the metadata object ready + m <- get_meta_result(x, row_id, column) + + if (!inherits(add_cols, 'quosures')) { + stop("add_cols must be provided using `dplyr::vars()`", call.=FALSE) + } + + # Subset and return the data + if (is.null(target)) { + stop("If querying metadata without a tplyr_table, a target must be provided", call.=FALSE) + } + + target %>% + filter(!!!m$filters) %>% + select(!!!add_cols, !!!m$names) +} + +#' @export +#' @rdname get_meta_subset +get_meta_subset.tplyr_table <- function(x, row_id, column, add_cols = vars(USUBJID), ...) { + + # Get the metadata object ready + m <- get_meta_result(x, row_id, column) + + if (!inherits(add_cols, 'quosures')) { + stop("add_cols must be provided using `dplyr::vars()`", call.=FALSE) + } + + # Subset and return the data + x$target %>% + filter(!!!m$filters) %>% + select(!!!add_cols, !!!m$names) +} + diff --git a/R/nested.R b/R/nested.R index de15a640..322b1588 100644 --- a/R/nested.R +++ b/R/nested.R @@ -5,8 +5,6 @@ process_nested_count_target <- function(x) { if(is.null(indentation)) indentation <- " " - - assert_that(quo_is_symbol(target_var[[2]]), msg = "Inner layers must be data driven variables") @@ -29,6 +27,10 @@ process_nested_count_target <- function(x) { immediate. = TRUE) } + if (isTRUE(include_total_row)) { + abort("You can't include total rows in nested counts. Instead, add a seperate layer for total counts.") + } + if (!is.null(denoms_by)) { change_denom_ind <- map_chr(denoms_by, as_name) %in% "summary_var" second_denoms_by <- denoms_by @@ -47,14 +49,33 @@ process_nested_count_target <- function(x) { set_denoms_by(!!!second_denoms_by)) first_layer_final <- first_layer$numeric_data - # add_column(!!target_var[[1]] := .[["summary_var"]]) second_layer_final <- second_layer$numeric_data %>% + filter_numeric( + numeric_cutoff = numeric_cutoff, + numeric_cutoff_stat = numeric_cutoff_stat, + numeric_cutoff_column = numeric_cutoff_column, + treat_var = treat_var + ) %>% group_by(!!target_var[[1]]) %>% do(filter_nested_inner_layer(., target, target_var[[1]], target_var[[2]], indentation)) + ignored_filter_rows <- ifelse(include_total_row, + ifelse(is.null(total_row_label), + "Total", + total_row_label), + character(0)) + # Bind the numeric data together - numeric_data <- bind_rows(first_layer_final, second_layer_final) + numeric_data <- bind_rows(first_layer_final, second_layer_final) %>% + filter_nested_numeric( + numeric_cutoff, + numeric_cutoff_stat, + numeric_cutoff_column, + treat_var, + target_var, + ignored_filter_rows + ) # Save the original by and target_vars incase the layer is rebuilt by_saved <- by @@ -109,3 +130,30 @@ refresh_nest <- function(x) { env_bind(x, by = env_get(x, "by_saved")) env_bind(x, target_var = env_get(x, "target_var_saved")) } + + +filter_nested_numeric <- function(.data, + numeric_cutoff, + numeric_cutoff_stat, + numeric_cutoff_column, + treat_var, + target_var, + ignored_rows) { + + if (is.null(numeric_cutoff)) { + return(.data) + } + + # All of the non NA values in target_var[[1]] are what we want to keep here. + # Because those are the 'inner' values that passed the filter + vals <- .data %>% + {if (is.null(numeric_cutoff_column)) . else filter(., !!treat_var == numeric_cutoff_column)} %>% + filter(!is.na(!!target_var[[1]])) %>% + extract2(as_name(target_var[[1]])) %>% + as.character() + + .data %>% + filter(!is.na(!!target_var[[1]]) | summary_var %in% c(vals, ignored_rows)) + + +} diff --git a/R/precision.R b/R/precision.R index fdf42168..69a8255e 100644 --- a/R/precision.R +++ b/R/precision.R @@ -3,7 +3,7 @@ #' @param v A vector of character strings #' @param side 1 = Integer, 2 = Decimal #' -#' @return A vector of lenths for the specified field +#' @return A vector of lengths for the specified field #' @noRd nchar_unit <- function(v, side) { @@ -30,6 +30,7 @@ nchar_unit <- function(v, side) { #' @param .data Data precision is calculated from #' @param precision_by Precision by variables - defaulted to the layer by #' @param precision_on Precision on variable - defaulted to first target_var variable +#' @param cap Capped precision passed in from set_format_strings #' #' @return A tibble look-up table with the precision_by variables, a variable for the #' maximum integer length (max_int), and the maximum decimal length (max_dec). @@ -49,3 +50,65 @@ make_prec_data <- function(.data, precision_by, precision_on, cap) { precision_on = as_label(precision_on) ) } + +#' Check and return the provided precision lookup table +#' +#' This function is called to extract the precision data if the user manually +#' provided a precision look up table. This functions assumes that +#' set_precision_data has been run on the layer. +#' +#' If the user specified default='auto', then the prec_error parameter in the layer +#' metadata is set to auto. When set to 'error', the table build will error if there +#' are by variable cases found for which the user did not provide precision. When set +#' to 'auto', these cases will be passed into make_prec_data, which will auto-calculate +#' the precision based on the built_target data. +#' +#' @param built_target Data precision is calculated from +#' @param prec External precision dataset +#' @param precision_by Precision by variables - pulled from the provided precision data +#' @param precision_on Precision on variable - defaulted to first target_var variable +#' @param cap Capped precision passed in from set_format_strings +#' @param prec_error How should unspecified cases be handled? +#' +#' @return A tibble look-up table with the precision_by variables, a variable for the +#' maximum integer length (max_int), and the maximum decimal length (max_dec). +#' +#' @return A tibble look-up table with the precision_by variables, a variable for the +#' maximum integer length (max_int), and the maximum decimal length (max_dec). +#' @noRd +get_prec_data <- function(built_target, prec, precision_by, precision_on, cap, prec_error) { + + # Do the types match between the prec data and the built target? + prec_types <- map_chr(precision_by, ~ class(prec[[as_label(.)]])) + data_types <- map_chr(precision_by, ~ class(built_target[[as_label(.)]])) + + assert_that( + all(prec_types == data_types), + msg = "By variable types mismatch between precision dataset and target data" + ) + + # What's in the data? + precision_by_cases <- built_target %>% + distinct(!!!precision_by) + + # What's missing from the provided table? + mismatches <- anti_join(precision_by_cases, prec, by = map_chr(precision_by, as_label)) + + if (prec_error == "error" && nrow(mismatches) > 0) { + stop('The precision data provided is missing by variable cases:\n', + paste(capture.output(print(mismatches)), collapse = "\n"), + call. = FALSE) + } else if (prec_error == "auto" && nrow(mismatches) > 0) { + message('Unhandled precision cases were found - calculating precision based on source data') + subset_target <- left_join(mismatches, built_target, by = map_chr(precision_by, as_label)) + auto_prec <- make_prec_data(subset_target, precision_by, precision_on, cap) + prec <- bind_rows(prec, auto_prec) + } + + prec_on <- as_label(precision_on) + + prec %>% + mutate( + precision_on = as_name(prec_on) + ) +} diff --git a/R/process_metadata.R b/R/process_metadata.R new file mode 100644 index 00000000..2ea1ecf9 --- /dev/null +++ b/R/process_metadata.R @@ -0,0 +1,232 @@ +#' Process metadata for a layer of type \code{desc} +#' +#' @param x Layer object +#' +#' @return Nothing +#' @export +#' @noRd +process_metadata.desc_layer <- function(x, ...) { + + evalq({ + # meta_sums store the metadata table built alongside trans_sums + meta_sums <- vector("list", length(target_var)) + form_meta <- vector("list", length(target_var)) + + for (i in seq_along(target_var)) { + cur_var <- target_var[[i]] + + # Prepare metadata table + meta_sum <- num_sums_raw[[i]] %>% + group_by(!!treat_var, !!!by, !!!cols) %>% + group_keys() %>% + # rowwise() %>% + mutate( + meta = build_desc_meta(cur_var, table_where, where, treat_grps, !!treat_var, !!!by, !!!cols) + ) + + + # Join meta table with the transposed summaries ready for formatting + meta_sums[[i]] <- trans_sums[[i]] %>% + select(!!treat_var, match_exact(by), !!!cols, row_label) %>% + left_join(meta_sum, by=c(as_label(treat_var), match_exact(by), match_exact(cols))) + + if (stats_as_columns) { + # Transpose the metadata identical to the summary + form_meta[[i]] <- meta_sums[[i]] %>% + pivot_wider(id_cols=c(!!treat_var, match_exact(by)), + names_from = match_exact(vars(row_label, !!!cols)), + names_prefix = paste0('var', i, "_"), + values_from = meta + ) + } else { + form_meta[[i]] <- meta_sums[[i]] %>% + pivot_wider(id_cols=c('row_label', match_exact(by)), + names_from = match_exact(vars(!!treat_var, !!!cols)), + names_prefix = paste0('var', i, "_"), + values_from = meta + ) + } + + } + + if (stats_as_columns) { + formatted_meta <- reduce(form_meta, full_join, by=c(as_label(treat_var), match_exact(by))) + formatted_meta <- replace_by_string_names(formatted_meta, by, treat_var) + } else { + formatted_meta <- reduce(form_meta, full_join, by=c('row_label', match_exact(by))) + formatted_meta <- replace_by_string_names(formatted_meta, by) + } + + formatted_meta <- assign_row_id(formatted_meta, 'd') + + }, envir=x) + + env_get(x, "formatted_meta") + +} + +#' Process metadata for a layer of type \code{count} +#' +#' @param x Layer object +#' +#' @return Nothing +#' @export +#' @noRd +process_metadata.count_layer <- function(x, ...) { + + evalq({ + layer <- current_env() + + # Build up the metadata for the count layer + meta_sum <- numeric_data %>% + mutate( + meta = build_count_meta( + layer, + table_where, + where, + treat_grps, + summary_var, + !!treat_var, + !!!by, + !!!cols + ) + ) + + # Pivot the meta table + formatted_meta <- meta_sum %>% + pivot_wider(id_cols = c(match_exact(by), "summary_var"), + names_from = c(!!treat_var, match_exact(cols)), values_from = meta, + names_prefix = "var1_") %>% + replace_by_string_names(quos(!!!by, summary_var)) + + if (is_built_nest) { + row_labels_meta <- vars_select(names(formatted_meta), starts_with("row_label")) + formatted_meta[is.na(formatted_meta[[1]]), 1] <- formatted_meta[is.na(formatted_meta[[1]]), + tail(row_labels, 1)] + } + + if (!is_empty(stats)) { + formatted_stats_metadata <- map(stats, process_metadata) %>% + reduce(full_join, by = c('summary_var', match_exact(c(by, head(target_var, -1))))) %>% + # Replace the by variables and target variable names with `row_label` + replace_by_string_names(quos(!!!by, summary_var)) + + formatted_meta <- full_join(formatted_meta, formatted_stats_metadata, + by = vars_select(names(formatted_meta), starts_with("row_label"))) + + } + + # Attach the row identifier + formatted_meta <- assign_row_id(formatted_meta, 'c') + + }, envir=x) + + env_get(x, "formatted_meta") + +} + +#' Process metadata for a layer of type \code{count} +#' +#' @param x Layer object +#' +#' @return Nothing +#' @export +#' @noRd +process_metadata.tplyr_riskdiff <- function(x, ...) { + + evalq({ + stats_meta <- vector('list', length(comparisons)) + + for (i in seq_along(comparisons)) { + + # Weird looking, but this will give me just the variables needed + stats_meta[[i]] <- meta_sum %>% + select(-!!treat_var, -any_of(c('n', 'distinct_n', 'distinct_total', 'total'))) %>% + mutate( + meta = build_rdiff_meta(meta, treat_var, comparisons[[i]]) + ) + + # Rename the meta variable + names(stats_meta[[i]])[ncol(stats_meta[[i]])] <- paste(c("rdiff", comparisons[[i]]), collapse = "_") + + } + + # Join the rdiff columns together + formatted_stats_meta <- reduce(stats_meta, + full_join, + by=c(match_exact(c(by, cols, head(target_var, -1))), 'summary_var')) %>% + distinct() + + if (length(cols) > 0) { + + # If only one comparison was made, the columns won't prefix with the transposed variable name + # So trick it by introducing a column I can drop later. Not great, but functional + formatted_stats_meta['rdiffx'] <- '' + + # Pivot by column + formatted_stats_meta <- formatted_stats_meta %>% + pivot_wider(id_cols=c(match_exact(c(by, head(target_var, -1))), 'summary_var'), + names_from = match_exact(cols), + names_sep = "_", + values_from=starts_with('rdiff')) + + # Drop the dummied columns + formatted_stats_meta <- formatted_stats_meta %>% select(-starts_with('rdiffx')) + + } + + # Handle the outer layer being NA for the outer layer + if (is_built_nest) { + formatted_stats_meta <- formatted_stats_meta %>% + mutate( + !!by[[1]] := if_else(is.na(!!by[[1]]), summary_var, as.character(!!by[[1]])) + ) + } + + }, envir=x) + + env_get(x, "formatted_stats_meta") + +} + +#' Process metadata for a layer of type \code{shift} +#' +#' @param x Layer object +#' +#' @return Nothing +#' @export +#' @noRd +process_metadata.shift_layer <- function(x, ...) { + evalq({ + + layer <- current_env() + + # Build up the metadata for the count layer + formatted_meta <- numeric_data %>% + mutate( + meta = build_shift_meta( + layer, + table_where, + where, + treat_grps, + summary_var, + !!treat_var, + !!!by, + !!!cols, + !!target_var$column + ) + ) %>% + # Pivot table + pivot_wider(id_cols = c(match_exact(by), "summary_var"), + names_from = c( !!treat_var, !!target_var$column, match_exact(cols)), + values_from = meta, + names_prefix = "var1_") %>% + replace_by_string_names(quos(!!!by, summary_var)) + + # Attach the row identifier + formatted_meta <- assign_row_id(formatted_meta, 's') + + }, envir=x) + + env_get(x, "formatted_meta") +} diff --git a/R/riskdiff.R b/R/riskdiff.R index 6cf1735c..9cbd53f2 100644 --- a/R/riskdiff.R +++ b/R/riskdiff.R @@ -124,6 +124,15 @@ add_risk_diff <- function(layer, ..., args=list(), distinct=TRUE) { assert_that(all(names(args) %in% c('p', 'alternative', 'conf.level', 'correct')), msg = "All arguments provided via `args` must be valid arguments of `prop.test`") + + for (comp in comps) { + assert_that(!any(duplicated(comp)), + msg = paste("Comparison", + paste0("{",comp[1], ", ",comp[2],"}"), + "has duplicated values. Comparisons must not be duplicates") + ) + } + # Risk diff must be run on count layers assert_that(inherits(layer, 'count_layer'), msg = "Risk difference can only be applied to a count layer.") @@ -133,7 +142,7 @@ add_risk_diff <- function(layer, ..., args=list(), distinct=TRUE) { layer, comparisons = comps, args = args, - distinct = distinct + comp_distinct = distinct ), class=c("tplyr_statistic", "tplyr_riskdiff") ) @@ -170,18 +179,18 @@ prep_two_way <- function(comp) { two_way <- numeric_data # Nested layers need to plug the NAs left over - needs revision in the future - if (is_built_nest) { + if (is_built_nest && quo_is_symbol(by[[1]])) { two_way <- two_way %>% # Need to fill in NAs in the numeric data that # are patched later in formatting mutate( - !!by[[1]] := ifelse(is.na(!!by[[1]]), summary_var, !!by[[1]]) + !!by[[1]] := if_else(is.na(!!by[[1]]), summary_var, as.character(!!by[[1]])) ) } # If distinct is set and distinct values are there, use them - if (distinct == TRUE && any(str_detect(names(two_way), 'distinct'))) { + if (comp_distinct && !is.null(distinct_by)) { two_way <- two_way %>% select(-n, -total) %>% rename(n = distinct_n, total = distinct_total) diff --git a/R/shift.R b/R/shift.R index 6ed83ff2..e46d9cc1 100644 --- a/R/shift.R +++ b/R/shift.R @@ -105,6 +105,7 @@ prepare_format_metadata.shift_layer <- function(x) { process_formatting.shift_layer <- function(x, ...) { evalq({ + formatted_data <- numeric_data %>% # Mutate value based on if there is a distinct_by mutate(n = construct_shift_string(.n=n, .total = total, @@ -117,6 +118,8 @@ process_formatting.shift_layer <- function(x, ...) { values_from = n, names_prefix = "var1_") %>% replace_by_string_names(quos(!!!by, summary_var)) + + formatted_data <- assign_row_id(formatted_data, 's') }, envir = x) add_order_columns(x) diff --git a/R/shift_bindings.R b/R/shift_bindings.R index 8519890f..0a4891a3 100644 --- a/R/shift_bindings.R +++ b/R/shift_bindings.R @@ -43,6 +43,8 @@ set_denoms_by <- function(e, ...) { UseMethod("set_denoms_by") } +#' @export +#' @noRd set_denoms_by.shift_layer <- function(e, ...) { dots <- vars(...) @@ -59,7 +61,7 @@ set_denoms_by.shift_layer <- function(e, ...) { msg = "A denom_by wasn't found as a grouping variable in the layer/table.") # If the row variable is here, rename it to summary_var - if(as_name(target_var$row) %in% dots_chr) { + if (as_name(target_var$row) %in% dots_chr) { dots[[which(dots_chr %in% as_name(target_var$row))]] <- quo(summary_var) } diff --git a/R/sort.R b/R/sort.R index d1a1879c..d4168df3 100644 --- a/R/sort.R +++ b/R/sort.R @@ -161,6 +161,7 @@ add_order_columns.count_layer <- function(x) { evalq({ + if(nrow(formatted_data) == 0) return(formatted_data) if(!exists("break_ties")) break_ties <- NULL # Set all defaults for ordering @@ -202,7 +203,7 @@ add_order_columns.count_layer <- function(x) { all_outer <- numeric_data %>% filter(!!!filter_logic) %>% - extract(1:outer_number, ) + extract(1:min(nrow(.), outer_number), ) # Add the ordering of the pieces in the layer formatted_data <- formatted_data %>% @@ -218,12 +219,15 @@ add_order_columns.count_layer <- function(x) { filter_logic = filter_logic, indentation = indentation, outer_inf = outer_inf, - break_ties = break_ties)) %>% + break_ties = break_ties, + numeric_cutoff = numeric_cutoff, + numeric_cutoff_stat = numeric_cutoff_stat, + numeric_cutoff_column = numeric_cutoff_column)) %>% ungroup() if (!is.null(nest_count) && nest_count) { # If the table nest should be collapsed into one row. - row_label_names <- vars_select(names(formatted_data), starts_with("row")) + row_label_names <- vars_select(names(formatted_data), starts_with("row_label")) # Remove first row formatted_data[, 1] <- NULL # Rename row labels @@ -256,7 +260,6 @@ add_order_columns.count_layer <- function(x) { })) } - formatted_data[, paste0("ord_layer_", formatted_col_index)] <- get_data_order(current_env(), formatted_col_index) # If there is a total row that is missing some ord values, they should fall @@ -430,7 +433,10 @@ get_data_order <- function(x, formatted_col_index) { treat_var, by, cols, result_order_var, target_var, missing_index, missing_sort_value, total_index, total_row_sort_value, - break_ties = break_ties) + break_ties = break_ties, + numeric_cutoff = numeric_cutoff, + numeric_cutoff_stat = numeric_cutoff_stat, + numeric_cutoff_column = numeric_cutoff_column) } else if (order_count_method == "byvarn") { @@ -526,7 +532,10 @@ get_data_order_bycount <- function(numeric_data, ordering_cols, treat_var, by, cols, result_order_var, target_var, missing_index = NULL, missing_sort_value = NULL, total_index = NULL, total_row_sort_value = NULL, - break_ties) { + break_ties, numeric_cutoff, numeric_cutoff_stat, + numeric_cutoff_column, nested = FALSE) { + + if (nrow(numeric_data) == 0) return(numeric()) # Make sure that if distinct_n is selected by set_result_order_var, that # there's a distinct variable in the numeric dataset @@ -568,6 +577,11 @@ get_data_order_bycount <- function(numeric_data, ordering_cols, ## WARNING: This has to be the same logic as the pivot in the count ordering or else it won't work numeric_ordering_data <- numeric_data %>% + {if (nested) . else filter_numeric(., + numeric_cutoff, + numeric_cutoff_stat, + numeric_cutoff_column, + treat_var)} %>% filter(!!!filter_logic) %>% # Sometimes row numbers are needed for nested counts if a value in the first @@ -697,7 +711,11 @@ add_data_order_nested <- function(group_data, final_col, numeric_data, ...) { all_outer$..index <- all_outer %>% get_data_order_bycount(ordering_cols, treat_var, vars(!!!head(by, -1)), cols, result_order_var, vars(!!by[[1]], !!target_var), - break_ties = break_ties) + break_ties = break_ties, + numeric_cutoff = numeric_cutoff, + numeric_cutoff_stat = numeric_cutoff_stat, + numeric_cutoff_column = numeric_cutoff_column, + nested = TRUE) group_data[, paste0("ord_layer_", final_col)] <- all_outer %>% filter(summary_var == outer_value) %>% @@ -705,10 +723,11 @@ add_data_order_nested <- function(group_data, final_col, numeric_data, ...) { select(..index) } + present_vars <- unlist(group_data[-1, row_label_vec[length(row_label_vec)]]) ##### Inner nest values ##### filtered_numeric_data <- numeric_data %>% # Only include the parts of the numeric data that is in the current label - filter(numeric_data$summary_var %in% unlist(group_data[-1, row_label_vec[length(row_label_vec)]]), !is.na(!!by[[1]])) %>% + filter(numeric_data$summary_var %in% present_vars, !is.na(!!by[[1]])) %>% # Remove nesting prefix to prepare numeric data. mutate(summary_var := str_sub(summary_var, indentation_length)) @@ -722,14 +741,20 @@ add_data_order_nested <- function(group_data, final_col, numeric_data, ...) { group_data[1, paste0("ord_layer_", final_col + 1)] <- ifelse((is.null(outer_inf) || outer_inf), Inf, -Inf) if(tail(order_count_method, 1) == "bycount") { - group_data[-1 , paste0("ord_layer_", final_col + 1)] <- get_data_order_bycount(filtered_numeric_data, - ordering_cols, - treat_var, - head(by, -1), - cols, - result_order_var, - target_var, - break_ties = break_ties) + if (nrow(group_data) > 1) { + group_data[-1 , paste0("ord_layer_", final_col + 1)] <- get_data_order_bycount(filtered_numeric_data, + ordering_cols, + treat_var, + head(by, -1), + cols, + result_order_var, + target_var, + break_ties = break_ties, + numeric_cutoff = numeric_cutoff, + numeric_cutoff_stat = numeric_cutoff_stat, + numeric_cutoff_column = numeric_cutoff_column, + nested = TRUE) + } } else if(tail(order_count_method, 1) == "byvarn") { varn_df <- get_varn_values(target, target_var[[1]]) @@ -745,7 +770,9 @@ add_data_order_nested <- function(group_data, final_col, numeric_data, ...) { } else { - group_data[-1, paste0("ord_layer_", final_col + 1)] <- 1:nrow(group_data[-1,]) + group_row_count <- nrow(group_data[-1,]) + # Logic for group_row_count is when numeric_where values cause unexpected results + group_data[-1, paste0("ord_layer_", final_col + 1)] <- 1:ifelse(group_row_count == 0, 1, group_row_count) } diff --git a/R/stats.R b/R/stats.R index d55d604a..02d3735e 100644 --- a/R/stats.R +++ b/R/stats.R @@ -117,13 +117,14 @@ process_statistic_formatting.tplyr_riskdiff <- function(x, ...) { by=c(match_exact(c(by, cols, head(target_var, -1))), 'summary_var')) if (length(cols) > 0) { + # If only one comparison was made, the columns won't prefix with the transposed variable name # So trick it by introducing a column I can drop later. Not great, but functional formatted_statistic_data['rdiffx'] <- '' # Pivot by column formatted_statistic_data <- formatted_statistic_data %>% - pivot_wider(id_cols=c(match_exact(c(by, cols, head(target_var, -1))), 'summary_var'), + pivot_wider(id_cols=c(match_exact(c(by, head(target_var, -1))), 'summary_var'), names_from = match_exact(cols), names_sep = "_", values_from=starts_with('rdiff')) diff --git a/R/str_indent_wrap.R b/R/str_indent_wrap.R new file mode 100644 index 00000000..cf53f100 --- /dev/null +++ b/R/str_indent_wrap.R @@ -0,0 +1,104 @@ +#' Wrap strings to a specific width with hyphenation while preserving +#' indentation +#' +#' `str_indent_wrap()` leverages `stringr::str_wrap()` under the hood, but takes +#' some extra steps to preserve any indentation that has been applied to a +#' character element, and use hyphenated wrapping of single words that run +#' longer than the allotted wrapping width. +#' +#' The function `stringr::str_wrap()` is highly efficient, but in the +#' context of table creation there are two select features missing - hyphenation +#' for long running strings that overflow width, and respect for pre-indentation +#' of a character element. For example, in an adverse event table, you may have +#' body system rows as an un-indented column, and preferred terms as indented +#' columns. These strings may run long and require wrapping to not surpass the +#' column width. Furthermore, for crowded tables a single word may be longer +#' than the column width itself. +#' +#' This function takes steps to resolve these two issues, while trying to +#' minimize additional overhead required to apply the wrapping of strings. +#' +#' Note: This function automatically converts tabs to spaces. Tab width varies +#' depending on font, so width cannot automatically be determined within a data +#' frame. As such, users can specify the width +#' +#' @param x An input character vector +#' @param width The desired width of elements within the output character vector +#' @param tab_width The number of spaces to which tabs should be converted +#' +#' @return A character vector with string wrapping applied +#' @export +#' @md +#' +#' @examples +#' ex_text1 <- c("RENAL AND URINARY DISORDERS", " NEPHROLITHIASIS") +#' ex_text2 <- c("RENAL AND URINARY DISORDERS", "\tNEPHROLITHIASIS") +#' +#' cat(paste(str_indent_wrap(ex_text1, width=8), collapse="\n\n"),"\n") +#' cat(paste(str_indent_wrap(ex_text2, tab_width=4), collapse="\n\n"),"\n") +str_indent_wrap <- function(x, width=10, tab_width=5) { + + if (!inherits(x, 'character')) { + stop('x must be a character vector', call.=FALSE) + } + + # Scan out tabs and convert them to spaces + x <- str_replace_all(x, "\\t", strrep(" ", tab_width)) + + # Find where the splits need to happen + sections <- str_locate_all(x, paste0("\\w{", width-1, "}(?=\\w)")) + + # Using the locations, build up the matrix of substrings + split_mat <- map(sections, ~ matrix(c(1, .[,2]+1, .[,2], -1), ncol=2)) + + # Dive the string into the necessary chunks + splits <- map2(x, split_mat, str_sub) + + hyph_str <- map_chr(splits, paste, collapse = "- ") + + # Get the indentation of the strings and make a data frame + wrap_df <- tibble( + l = get_ind_len(x), + w = width - l, + s = hyph_str + ) + + # Group by width and length, wrap the strings, and return the output vector + wrap_df %>% + group_by(w, l) %>% + mutate( + out = grouped_str_wrap(s) + ) %>% + pull(out) +} + +#' Get the indentation length +#' +#' Vectorized approach to extracting the length of indentation, with +#' compensation for NAs +#' +#' @param s Input string to have indentation length measured +#' +#' @return Integer vector of character length of indentation +#' @noRd +get_ind_len <- function(s) { + inds <- str_extract(s, "^\\s+") + inds[which(is.na(inds))] <- "" + map_int(inds, nchar, type = "width") +} + +#' Proper application of str_wrap in a grouped context +#' +#' Width, indent, and exdent all need single elements, so str_wrap doesn't work +#' well in an grouped mutate context through a data frame. So using the grouped +#' structure, this pulls out the group call str_wrap using the single element +#' integers for width, indent, and exdent +#' +#' @param s Input character vector string to wrap +#' +#' @return Character vector of wrapped strings +#' @noRd +grouped_str_wrap <- function(s) { + g <- cur_group() + str_wrap(s, width = g$w, indent = g$l, exdent = g$l) +} diff --git a/R/utils-pipe.R b/R/utils-pipe.R new file mode 100644 index 00000000..fd0b1d13 --- /dev/null +++ b/R/utils-pipe.R @@ -0,0 +1,14 @@ +#' Pipe operator +#' +#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. +#' +#' @name %>% +#' @rdname pipe +#' @keywords internal +#' @export +#' @importFrom magrittr %>% +#' @usage lhs \%>\% rhs +#' @param lhs A value or the magrittr placeholder. +#' @param rhs A function call using the magrittr semantics. +#' @return The result of calling `rhs(lhs)`. +NULL diff --git a/R/utils.R b/R/utils.R index 2ec90db3..7770aa49 100644 --- a/R/utils.R +++ b/R/utils.R @@ -9,32 +9,57 @@ #' @return The original call object with #' #' @noRd -#' @examples -#' -#' modify_nested_call(mean(c(1,2,3)) %>% print(), na.rm=TRUE) -modify_nested_call <- function(c, allowable_calls = getNamespaceExports("Tplyr"), ...) { - # If the call is not from magrittr, then modify the contents and return the call - if (call_name(c) != "%>%") { - # Only allow the user to use `tplyr` functions - if (!is.null(allowable_calls)) { - assert_that(call_name(c) %in% allowable_calls, msg = "Functions called within `add_layer` must be part of `Tplyr`") - } - c <- call_modify(.call=c, ...) +modify_nested_call <- function(c, examine_only=FALSE, ...) { - } else { - if (!is.null(allowable_calls)) { - # Only allow the user to use `tplyr` functions - assert_that(all(map_chr(call_args(c), call_name) %in% c(allowable_calls, '%>%')), - msg="Functions called within `add_layer` must be part of `Tplyr`") - } + # Get exports from Tplyr + allowable_calls = objects("package:Tplyr") + + # Only allow the user to use `Tplyr` functions + assert_that( + call_name(c) %in% allowable_calls, + msg = "Functions called within `add_layer` must be part of `Tplyr`" + ) + + # Process the magrittr pipe + if (call_name(c) == "%>%") { + # Only allow the user to use `Tplyr` functions on both sides of the pipe + assert_that(all(map_chr(call_args(c), call_name) %in% allowable_calls), + msg="Functions called within `add_layer` must be part of `Tplyr`") # Recursively extract the left side of the magrittr call to work your way up e <- call_standardise(c) - c <- modify_nested_call(call_args(e)$lhs, allowable_calls = allowable_calls, ...) - # Modfify the magittr call by inserting the call retrieved from recursive command back in - c <- call_modify(e, lhs=c) - c + c <- modify_nested_call(call_args(e)$lhs, examine_only, ...) + if (!examine_only) { + # Modify the magittr call by inserting the call retrieved from recursive command back in + c <- call_modify(e, lhs=c) + c + } + } + # Process the 'native' pipe (arguments logically insert as first parameter) + else if (!str_starts(call_name(c), "group_[cds]|use_template")) { + + # Standardize the call to get argument names and pull out the literal first argument + # Save the call to a new variable in the process + e <- call_standardise(c) + args <- call_args(e)[1] + + # Send the first parameter back down recursively through modify_nested_call and + # save it back to the arguments list + c <- modify_nested_call(call_args(c)[[1]], ...) + + if (!examine_only) { + args[[1]] <- c + + # Modify the standardized call with the modified first parameter and send it up + c <- call_modify(e, !!!args) + c + } + } + # If the call is not from magrittr or the pipe, then modify the contents and return the call + else if (!examine_only) { + c <- call_modify(.call=c, ...) } + } #' Find depth of a layer object @@ -55,7 +80,6 @@ depth_from_table <- function(layer, i){ } } - #' Convert a list of quosures to character strings #' #' Intended for use in a tidyselect context. Pivots take arguments as character strings or indices. Tidyselect tools return those @@ -90,13 +114,16 @@ match_exact <- function(var_list) { #' #' @param dat A data.frame/tibble to have row labels renamed #' @param by The \code{by} object within a layer +#' @param treat_var treatment variable quosure for use when stats_as_columns is true #' #' @return A tibble with renamed variables and row labels re-ordered to the front of the tibble #' @noRd -replace_by_string_names <- function(dat, by) { +replace_by_string_names <- function(dat, by, treat_var = NULL) { # By must be a list of quosures assert_that(is_quosures(by), msg = "`by` must be a list of quosures") + by <- append(by, treat_var) + # If there were character strings in the by variables then rename them # with an index, starting at 1 for (i in seq_along(by)) { @@ -292,3 +319,20 @@ ut_round <- function(x, n=0) # Return the rounded number return(y) } + +#' Assign a row identifier to a layer +#' +#' To link with the metadata we need an row identifier to link +#' the metadata post sort with built data +#' +#' @param dat Input data that should be ordered identically to the metadata +#' @param layer_type First character of the layer type +#' +#' @return Data with row_id assigned +#' @noRd +assign_row_id <- function(dat, layer_type) { + dat %>% + mutate( + row_id = paste0(layer_type, row_number()) + ) +} diff --git a/R/zzz.R b/R/zzz.R index e68f533c..5a3e6437 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,17 +1,17 @@ -#' @importFrom rlang env enquo enquos caller_env abort inform is_quosure quo_get_expr quo_is_null env_get env_bind env_has quo_is_missing +#' @importFrom rlang env enquo enquos caller_env abort inform is_quosure quo_get_expr quo_is_null env_get env_bind env_has quo_is_missing quos enexprs #' @importFrom rlang call_modify call_standardise call_name call_args is_call current_env quo_name trace_back is_function -#' @importFrom rlang expr exprs enexprs enexpr is_named env_parent env_label is_logical is_empty is_quosures quo_is_symbol sym := as_name -#' @importFrom rlang quos quo env_names env_bind_active as_label eval_tidy warn -#' @importFrom stringr str_split str_extract_all regex str_detect str_replace_all str_replace str_locate_all fixed str_count str_trim +#' @importFrom rlang expr exprs enexprs enexpr is_named env_parent env_label is_logical is_empty is_quosures quo_is_symbol sym syms := as_name +#' @importFrom rlang quos quo env_names env_bind_active as_label eval_tidy warn quo_is_call +#' @importFrom stringr str_split str_extract_all regex str_detect str_replace_all str_replace str_locate_all fixed str_count str_trim str_wrap #' @importFrom purrr flatten map map_lgl pmap_chr imap reduce map_chr map_int map_dbl map_dfr pmap_dfr walk2 map2 map2_dfr walk -#' @importFrom stringr str_sub str_extract str_pad str_starts str_remove_all +#' @importFrom stringr str_sub str_extract str_pad str_starts str_remove_all str_match_all #' @importFrom tidyr pivot_longer pivot_wider replace_na -#' @importFrom magrittr %>% extract +#' @importFrom magrittr %>% extract extract2 #' @importFrom assertthat assert_that #' @importFrom stats IQR median sd quantile var #' @importFrom dplyr n summarize filter vars tally ungroup group_by mutate lag select bind_rows full_join add_tally distinct rowwise #' @importFrom dplyr everything rename mutate_at mutate_all as_tibble bind_cols do case_when arrange left_join row_number between mutate_if -#' @importFrom dplyr across +#' @importFrom dplyr across anti_join n_distinct if_else group_keys cur_group pull #' @importFrom tidyr complete nesting pivot_wider pivot_longer replace_na starts_with #' @importFrom utils str head tail #' @importFrom tidyselect all_of vars_select any_of @@ -19,6 +19,7 @@ #' @importFrom lifecycle deprecate_soft deprecate_stop #' @importFrom stats var #' @importFrom forcats fct_expand fct_collapse fct_explicit_na fct_drop +#' @importFrom utils capture.output NULL #' A grammar of summary data for clinical reports @@ -134,19 +135,19 @@ tplyr_default_options <- list( # Desc layer defaults tplyr.desc_layer_default_formats = - list("n" = f_str("xxx", n), - "Mean (SD)"= f_str("a.a+1 (a.a+2)", mean, sd), - "Median" = f_str("a.a+1", median), - "Q1, Q3" = f_str("a.a+1, a.a+1", q1, q3), - "Min, Max" = f_str("a.a, a.a", min, max), - "Missing" = f_str("xxx", missing) + list("n" = f_str("xxx", n), + "Mean (SD)" = f_str("a.a+1 (a.a+2)", mean, sd), + "Median" = f_str("a.a+1", median), + "Q1, Q3" = f_str("a.a+1, a.a+1", q1, q3), + "Min, Max" = f_str("a.a, a.a", min, max), + "Missing" = f_str("xxx", missing) ), # Shift layer defaults tplyr.shift_layer_default_formats = list(f_str("a", n)), # Precision caps for decimal and integer precision - tplyr.precision_cap = c('int' = 99, 'dec'=99), + tplyr.precision_cap = c('int' = 99, 'dec' = 99), # Custom summaries tplyr.custom_summaries = NULL, @@ -158,7 +159,10 @@ tplyr_default_options <- list( tplyr.quantile_type = 7, # Rounding option default - tplyr.IBMRounding = FALSE + tplyr.IBMRounding = FALSE, + + # Layer templates + tplyr.layer_templates = list() ) # Carry out process on load ---- @@ -168,7 +172,7 @@ tplyr_default_options <- list( # Set any options that haven't been set toset <- !(names(tplyr_default_options) %in% names(op)) - if(any(toset)) options(tplyr_default_options[toset]) + if (any(toset)) options(tplyr_default_options[toset]) invisible() } @@ -279,6 +283,23 @@ built_target_pre_where <- NULL count_fmt <- NULL count_missings <- NULL has_missing_count <- FALSE -kept_levels <-expr(TRUE) +kept_levels <- expr(TRUE) levels_to_keep <- NULL break_ties <- NULL +prec_error <- NULL +stats_as_columns <- FALSE +comp_distinct <- NULL +numeric_cutoff <- NULL +numeric_cutoff_stat <- NULL +numeric_cutoff_column <- NULL +meta <- NULL +meta_sum <- NULL +num_sums_raw <- NULL +row_labels <- NULL +row_id <- NULL +USUBJID <- NULL +trans_sums <- NULL +l <- NULL +w <- NULL +s <- NULL +out <- NULL diff --git a/README.Rmd b/README.Rmd index da597b03..f4863985 100644 --- a/README.Rmd +++ b/README.Rmd @@ -24,14 +24,15 @@ load("vignettes/adsl.Rdata") # Tplyr -[](https://ostinclinicalresearch.slack.com) +[](https://pharmaverse.org) [](https://RValidationHub.slack.com) [![R build status](https://github.com/atorus-research/tplyr/workflows/R-CMD-check/badge.svg)](https://github.com/atorus-research/tplyr/actions?workflow=R-CMD-check) [](https://app.codecov.io/gh/atorus-research/tplyr) [](https://github.com/atorus-research/Tplyr/blob/master/LICENSE) -[![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental-1) +[![Lifecycle: stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable) + -Welcome to Tplyr! Tplyr is a grammar of data format and summary. It's designed to simplify the creation of clinical safety summaries and help you focus on how you present your data rather than redundant summaries being performed. +Welcome to Tplyr! Tplyr is a traceability minded grammar of data format and summary. It's designed to simplify the creation of common clinical summaries and help you focus on how you present your data rather than redundant summaries being performed. Furthermore, for every result Tplyr produces, it also produces the metadata necessary to give your traceability from source to summary. As always, we welcome your feedback. If you spot a bug, would like to see a new feature, or if any documentation is unclear - submit an issue through GitHub right [here](https://github.com/atorus-research/Tplyr/issues). @@ -46,20 +47,20 @@ You can Tplyr install with: install.packages("Tplyr") # Or install the development version: -devtools::install_github("https://github.com/atorus-research/Tplyr.git") +devtools::install_github("https://github.com/atorus-research/Tplyr.git", ref="devel") ``` # What is Tplyr? [dplyr](https://dplyr.tidyverse.org/) from tidyverse is a grammar of data manipulation. So what does that allow you to do? It gives you, as a data analyst, the capability to easily and intuitively approach the problem of manipulating your data into an analysis ready form. `dplyr` conceptually breaks things down into verbs that allow you to focus on _what_ you want to do more than _how_ you have to do it. -`Tplyr` is designed around a similar concept, but its focus is on building summary tables within the clinical world. In the pharmaceutical industry, a great deal of the data presented in the outputs we create are very similar. For the most part, most of these tables can be broken down into a few categories: +`Tplyr` is designed around a similar concept, but its focus is on building summary tables common within the clinical world. In the pharmaceutical industry, a great deal of the data presented in the outputs we create are very similar. For the most part, most of these tables can be broken down into a few categories: - Counting for event based variables or categories - Shifting, which is just counting a change in state with a 'from' and a 'to' - Generating descriptive statistics around some continuous variable. -For many of the tables that go into a clinical submission, at least when considering safety outputs, the tables are made up of a combination of these approaches. Consider a demographics table - and let's use an example from the PHUSE project Standard Analyses & Code Sharing - [Analyses & Displays Associated with Demographics, Disposition, and Medications in Phase 2-4 Clinical Trials and Integrated Summary Documents](https://phuse.s3.eu-central-1.amazonaws.com/Deliverables/Standard+Analyses+and+Code+Sharing/Analyses+%26+Displays+Associated+with+Demographics,+Disposition+and+Medication+in+Phase+2-4+Clinical+Trials+and+Integrated+Summary+Documents.pdf). +For many of the tables that go into a clinical submission, the tables are made up of a combination of these approaches. Consider a demographics table - and let's use an example from the PHUSE project Standard Analyses & Code Sharing - [Analyses & Displays Associated with Demographics, Disposition, and Medications in Phase 2-4 Clinical Trials and Integrated Summary Documents](https://phuse.s3.eu-central-1.amazonaws.com/Deliverables/Standard+Analyses+and+Code+Sharing/Analyses+%26+Displays+Associated+with+Demographics,+Disposition+and+Medication+in+Phase+2-4+Clinical+Trials+and+Integrated+Summary+Documents.pdf).

@@ -93,7 +94,7 @@ tplyr_table(adsl, TRT01P, where = SAFFL == "Y") %>% ## 'Tplyr' is Qualified -We understand how important documentation and testing is within the pharmaceutical world. This is why outside of unit testing 'Tplyr includes an entire user-acceptable testing document, where requirements were established, test-cases were written, and tests were independently programmed and executed. We do this in the hope that you can leverage our work within a qualified programming environment, and that we save you a substantial amount of trouble in getting it there. +We understand how important documentation and testing is within the pharmaceutical world. This is why outside of unit testing 'Tplyr includes an entire user-acceptance testing document, where requirements were established, test-cases were written, and tests were independently programmed and executed. We do this in the hope that you can leverage our work within a qualified programming environment, and that we save you a substantial amount of trouble in getting it there. You can find the qualification document within this repository right [here](https://github.com/atorus-research/Tplyr/blob/master/uat/references/output/uat.pdf). The 'uat' folder additionally contains all of the raw files, programmatic tests, specifications, and test cases necessary to create this report. @@ -120,6 +121,11 @@ There's quite a bit more to learn! And we've prepared a number of other vignette - Learn more about using 'Tplyr' options in `vignette("options")` - And finally, learn more about producing and outputting styled tables using 'Tplyr' in `vignette("styled-table")` +In the Tplyr version 1.0.0, we've packed a number of new features in. For deeper dives on the largest new additions: + +- Learn about Tplyr's traceability metadata in `vignette("metadata")` and about how it can be extended in `vigentte("custom-metadata")` +- Learn about layer templates in `vignette("layer_templates")` + # References In building 'Tplyr', we needed some additional resources in addition to our personal experience to help guide design. PHUSE has done some great work to create guidance for standard outputs with collaboration between multiple pharmaceutical companies and the FDA. You can find some of the resource that we referenced below. diff --git a/README.md b/README.md index 0546b151..318bc427 100644 --- a/README.md +++ b/README.md @@ -5,19 +5,22 @@ -[](https://ostinclinicalresearch.slack.com) +[](https://pharmaverse.org) [](https://RValidationHub.slack.com) [![R build status](https://github.com/atorus-research/tplyr/workflows/R-CMD-check/badge.svg)](https://github.com/atorus-research/tplyr/actions?workflow=R-CMD-check) [](https://app.codecov.io/gh/atorus-research/tplyr) [](https://github.com/atorus-research/Tplyr/blob/master/LICENSE) [![Lifecycle: -experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental-1) +stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable) + -Welcome to Tplyr! Tplyr is a grammar of data format and summary. It’s -designed to simplify the creation of clinical safety summaries and help -you focus on how you present your data rather than redundant summaries -being performed. +Welcome to Tplyr! Tplyr is a traceability minded grammar of data format +and summary. It’s designed to simplify the creation of common clinical +summaries and help you focus on how you present your data rather than +redundant summaries being performed. Furthermore, for every result Tplyr +produces, it also produces the metadata necessary to give your +traceability from source to summary. As always, we welcome your feedback. If you spot a bug, would like to see a new feature, or if any documentation is unclear - submit an issue @@ -36,7 +39,7 @@ You can Tplyr install with: install.packages("Tplyr") # Or install the development version: -devtools::install_github("https://github.com/atorus-research/Tplyr.git") +devtools::install_github("https://github.com/atorus-research/Tplyr.git", ref="devel") ``` # What is Tplyr? @@ -49,22 +52,22 @@ conceptually breaks things down into verbs that allow you to focus on *what* you want to do more than *how* you have to do it. `Tplyr` is designed around a similar concept, but its focus is on -building summary tables within the clinical world. In the pharmaceutical -industry, a great deal of the data presented in the outputs we create -are very similar. For the most part, most of these tables can be broken -down into a few categories: +building summary tables common within the clinical world. In the +pharmaceutical industry, a great deal of the data presented in the +outputs we create are very similar. For the most part, most of these +tables can be broken down into a few categories: - Counting for event based variables or categories - Shifting, which is just counting a change in state with a ‘from’ and a ‘to’ - Generating descriptive statistics around some continuous variable. -For many of the tables that go into a clinical submission, at least when -considering safety outputs, the tables are made up of a combination of -these approaches. Consider a demographics table - and let’s use an -example from the PHUSE project Standard Analyses & Code Sharing - -[Analyses & Displays Associated with Demographics, Disposition, and -Medications in Phase 2-4 Clinical Trials and Integrated Summary +For many of the tables that go into a clinical submission, the tables +are made up of a combination of these approaches. Consider a +demographics table - and let’s use an example from the PHUSE project +Standard Analyses & Code Sharing - [Analyses & Displays Associated with +Demographics, Disposition, and Medications in Phase 2-4 Clinical Trials +and Integrated Summary Documents](https://phuse.s3.eu-central-1.amazonaws.com/Deliverables/Standard+Analyses+and+Code+Sharing/Analyses+%26+Displays+Associated+with+Demographics,+Disposition+and+Medication+in+Phase+2-4+Clinical+Trials+and+Integrated+Summary+Documents.pdf).

@@ -105,6 +108,7 @@ Test Data Factory data [here](https://github.com/atorus-research/CDISC_pilot_replication). ``` r + tplyr_table(adsl, TRT01P, where = SAFFL == "Y") %>% add_layer( group_desc(AGE, by = "Age (years)") @@ -116,23 +120,23 @@ tplyr_table(adsl, TRT01P, where = SAFFL == "Y") %>% kable() ``` -| row\_label1 | row\_label2 | var1\_Placebo | var1\_Xanomeline High Dose | var1\_Xanomeline Low Dose | ord\_layer\_index | ord\_layer\_1 | ord\_layer\_2 | -|:---------------------|:------------|:--------------|:---------------------------|:--------------------------|------------------:|--------------:|--------------:| -| Age (years) | n | 86 | 84 | 84 | 1 | 1 | 1 | -| Age (years) | Mean (SD) | 75.2 ( 8.59) | 74.4 ( 7.89) | 75.7 ( 8.29) | 1 | 1 | 2 | -| Age (years) | Median | 76.0 | 76.0 | 77.5 | 1 | 1 | 3 | -| Age (years) | Q1, Q3 | 69.2, 81.8 | 70.8, 80.0 | 71.0, 82.0 | 1 | 1 | 4 | -| Age (years) | Min, Max | 52, 89 | 56, 88 | 51, 88 | 1 | 1 | 5 | -| Age (years) | Missing | 0 | 0 | 0 | 1 | 1 | 6 | -| Age Categories n (%) | <65 | 14 ( 16.3%) | 11 ( 13.1%) | 8 ( 9.5%) | 2 | 1 | 1 | -| Age Categories n (%) | >80 | 30 ( 34.9%) | 18 ( 21.4%) | 29 ( 34.5%) | 2 | 1 | 2 | -| Age Categories n (%) | 65-80 | 42 ( 48.8%) | 55 ( 65.5%) | 47 ( 56.0%) | 2 | 1 | 3 | +| row_label1 | row_label2 | var1_Placebo | var1_Xanomeline High Dose | var1_Xanomeline Low Dose | ord_layer_index | ord_layer_1 | ord_layer_2 | +|:---------------------|:-----------|:-------------|:--------------------------|:-------------------------|----------------:|------------:|------------:| +| Age (years) | n | 86 | 84 | 84 | 1 | 1 | 1 | +| Age (years) | Mean (SD) | 75.2 ( 8.59) | 74.4 ( 7.89) | 75.7 ( 8.29) | 1 | 1 | 2 | +| Age (years) | Median | 76.0 | 76.0 | 77.5 | 1 | 1 | 3 | +| Age (years) | Q1, Q3 | 69.2, 81.8 | 70.8, 80.0 | 71.0, 82.0 | 1 | 1 | 4 | +| Age (years) | Min, Max | 52, 89 | 56, 88 | 51, 88 | 1 | 1 | 5 | +| Age (years) | Missing | 0 | 0 | 0 | 1 | 1 | 6 | +| Age Categories n (%) | \<65 | 14 ( 16.3%) | 11 ( 13.1%) | 8 ( 9.5%) | 2 | 1 | 1 | +| Age Categories n (%) | \>80 | 30 ( 34.9%) | 18 ( 21.4%) | 29 ( 34.5%) | 2 | 1 | 2 | +| Age Categories n (%) | 65-80 | 42 ( 48.8%) | 55 ( 65.5%) | 47 ( 56.0%) | 2 | 1 | 3 | ## ‘Tplyr’ is Qualified We understand how important documentation and testing is within the pharmaceutical world. This is why outside of unit testing ’Tplyr -includes an entire user-acceptable testing document, where requirements +includes an entire user-acceptance testing document, where requirements were established, test-cases were written, and tests were independently programmed and executed. We do this in the hope that you can leverage our work within a qualified programming environment, and that we save @@ -173,6 +177,13 @@ vignettes to help you get what you need out of ‘Tplyr’. - And finally, learn more about producing and outputting styled tables using ‘Tplyr’ in `vignette("styled-table")` +In the Tplyr version 1.0.0, we’ve packed a number of new features in. +For deeper dives on the largest new additions: + +- Learn about Tplyr’s traceability metadata in `vignette("metadata")` + and about how it can be extended in `vigentte("custom-metadata")` +- Learn about layer templates in `vignette("layer_templates")` + # References In building ‘Tplyr’, we needed some additional resources in addition to diff --git a/_pkgdown.yml b/_pkgdown.yml index 68203a4e..58f54aee 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,6 +1,12 @@ +url: https://atorus-research.github.io/Tplyr destination: docs template: + bootstrap: 5 + bslib: + bg: "#FFFFFF" + fg: "#1D252D" + primary: "#1D252D" params: bootswatch: yeti ganalytics: UA-165685385-2 @@ -11,7 +17,16 @@ template: twitter: creator: "@AtorusR" card: summary_large_image - +search: + exclude: ['news/index.html'] +repo: + url: + home: https://github.com/atorus-research/Tplyr/ + source: https://github.com/atorus-research/Tplyr/blob/main/ + issue: https://github.com/atorus-research/Tplyr/issues/ + user: https://github.com/atorus-research +news: + cran_dates: true reference: - title: Build desc: High-level functions to create and build a table @@ -31,7 +46,7 @@ reference: desc: Customizing the display as a table - contents: - f_str - - ends_with("formats") + - ends_with("layer_formats") - set_format_strings - set_missing_count - title: Sorting @@ -42,17 +57,22 @@ reference: - set_result_order_var - set_outer_sort_position - title: Adding Groups and Stats + desc: Adding treatment groups, total rows, and risk difference - contents: - add_total_row - add_total_group - add_risk_diff - set_total_row_label -- title: Summary Functions +- title: Descriptive Statistics Layer Functions + desc: Descriptive statistics layer helper functions - contents: - set_custom_summaries + - set_precision_data - set_precision_by - set_precision_on + - set_stats_as_columns - title: Counting functions + desc: Count layer helper functions - contents: - set_denoms_by - set_distinct_by @@ -63,21 +83,35 @@ reference: - keep_levels - set_denom_ignore - set_indentation + - set_numeric_threshold - title: Column Headers + desc: Column header helpers - contents: - add_column_headers - header_n +- title: Metadata Functions + desc: Tplyr metadata functions +- contents: + - tplyr_meta + - add_variables + - add_filters + - get_metadata + - append_metadata + - starts_with('get_meta') - title: Helper functions + desc: General helper functions - contents: + - apply_formats + - str_indent_wrap - apply_row_masks - get_numeric_data - get_stats_data - - by - get_by - get_target_var - treat_var - get_where.tplyr_layer - Tplyr + - new_layer_template articles: - title: Table Vignettes @@ -96,7 +130,13 @@ articles: - riskdiff - sort - options + - layer_templates - styled-table - denom - Tplyr +- title: Using Metadata + navbar: Metadata + contents: + - metadata + - custom-metadata diff --git a/docs/404.html b/docs/404.html index c72b3493..c505ccf7 100644 --- a/docs/404.html +++ b/docs/404.html @@ -1,215 +1,134 @@ - - - - + + + + - - + Page not found (404) • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + - - - - - - - - - - -

-
-
+
+ + +
-
- - - - +
- -
- +
+
+ - - diff --git a/docs/CONTRIBUTING.html b/docs/CONTRIBUTING.html index 977098e9..5aa19d99 100644 --- a/docs/CONTRIBUTING.html +++ b/docs/CONTRIBUTING.html @@ -1,296 +1,167 @@ - - - - - - - -Contributor Covenant Code of Conduct • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Contributor Covenant Code of Conduct • Tplyr - - - - - + + Skip to contents + +
-
- -
- +
+ - - - + diff --git a/docs/ISSUE_TEMPLATE.html b/docs/ISSUE_TEMPLATE.html index 469c3e8b..8ddefd8d 100644 --- a/docs/ISSUE_TEMPLATE.html +++ b/docs/ISSUE_TEMPLATE.html @@ -1,246 +1,125 @@ - - - - - - - -NA • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NA • Tplyr - - - - - - - - - - -
-
-
+
+ +
+
-
- -
- +
+ - - - + diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 63592584..4b29c544 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -1,217 +1,102 @@ - - - - - - - -License • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -License • Tplyr - - - - - - - - - - -
-
-
+
+ +
+
-
- +
- +
- - -
- - -
-

Site built with pkgdown 1.6.1.

+ -
-
+
+ - - - + diff --git a/docs/LICENSE.html b/docs/LICENSE.html index 8e926057..f7ae796b 100644 --- a/docs/LICENSE.html +++ b/docs/LICENSE.html @@ -1,183 +1,80 @@ - - - - - - - -The MIT License (MIT) • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -The MIT License (MIT) • Tplyr - - - - - - - - - - -
-
-
+
+ +
+
-
- - - - -
- +
-
- +
+ - - - + diff --git a/docs/PULL_REQUEST_TEMPLATE.html b/docs/PULL_REQUEST_TEMPLATE.html index 0185e96d..469467c0 100644 --- a/docs/PULL_REQUEST_TEMPLATE.html +++ b/docs/PULL_REQUEST_TEMPLATE.html @@ -1,191 +1,86 @@ - - - - - - - -NA • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NA • Tplyr - - - - - - - - - - -
-
-
+
+ +
+
-
- - - -
+
- -
- +
+ - - - + diff --git a/docs/articles/Tplyr.html b/docs/articles/Tplyr.html index 2cd1e83b..c2ec206f 100644 --- a/docs/articles/Tplyr.html +++ b/docs/articles/Tplyr.html @@ -4,7 +4,8 @@ - + + Getting Started • Tplyr @@ -12,14 +13,13 @@ - - - + + + - - + - + @@ -35,269 +35,347 @@ gtag('config', 'UA-165685385-2'); - -
-
-
- @@ -874,15 +1028,24 @@

1
-

In set_custom_summaries(), first you name the summary being performed. This is important - that name is what you use in the f_str() call to incorporate it into a format. Next, you program or call the function desired. What happens in the background is that this is used in a call to dplyr::summarize() - so use similar syntax. Use the variable name .var in your custom summary function. This is necessary because it allows a generic variable name to be used when multiple target variables are specified - and therefore the function can be applied to both target variables.

-

Sometimes there’s a need to present multiple variables summarized side by side. ‘Tplyr’ allows you to do this as well.

+

In set_custom_summaries(), first you name the summary +being performed. This is important - that name is what you use in the +f_str() call to incorporate it into a format. Next, you +program or call the function desired. What happens in the background is +that this is used in a call to dplyr::summarize() - so use +similar syntax. Use the variable name .var in your custom +summary function. This is necessary because it allows a generic variable +name to be used when multiple target variables are specified - and +therefore the function can be applied to both target variables.

+

Sometimes there’s a need to present multiple variables summarized +side by side. ‘Tplyr’ allows you to do this as well.

-tplyr_table(adsl, TRT01P) %>% 
-  add_layer(
-    group_desc(vars(AGE, AVGDD), by = "Age and Avg. Daily Dose")
-  ) %>% 
-  build() %>% 
-  kable()
+tplyr_table(adsl, TRT01P) %>% + add_layer( + group_desc(vars(AGE, AVGDD), by = "Age and Avg. Daily Dose") + ) %>% + build() %>% + kable()
@@ -991,20 +1154,29 @@

-

‘Tplyr’ summarizes both variables and merges them together. This makes creating tables where you need to compare BASE, AVAL, and CHG next to each other nice and simple. Note the use of dplyr::vars() - in any situation where you’d like to use multiple variable names in a parameter, use dplyr::vars() to specify the variables. You can use text strings in the calls to dplyr::vars() as well.

+

‘Tplyr’ summarizes both variables and merges them together. This +makes creating tables where you need to compare BASE, AVAL, and CHG next +to each other nice and simple. Note the use of +dplyr::vars() - in any situation where you’d like to use +multiple variable names in a parameter, use dplyr::vars() +to specify the variables. You can use text strings in the calls to +dplyr::vars() as well.

-
-

-Count Layers

-

Count layers generally allow you to create “n” and “n (%)” count type summaries. There are a few extra features here as well. Let’s say that you want a total row within your counts. This can be done with add_total_row():

+
+

Count Layers +

+

Count layers generally allow you to create “n” and “n (%)” count type +summaries. There are a few extra features here as well. Let’s say that +you want a total row within your counts. This can be done with +add_total_row():

-tplyr_table(adsl, TRT01P) %>% 
-  add_layer(
-    group_count(AGEGR1, by = "Age categories") %>% 
-      add_total_row()
-  ) %>% 
-  build() %>% 
-  kable()
+tplyr_table(adsl, TRT01P) %>% + add_layer( + group_count(AGEGR1, by = "Age categories") %>% + add_total_row() + ) %>% + build() %>% + kable()
@@ -1069,16 +1241,18 @@

-

Sometimes it’s also necessary to count summaries based on distinct values. ‘Tplyr’ allows you to do this as well with set_distinct_by():

+

Sometimes it’s also necessary to count summaries based on distinct +values. ‘Tplyr’ allows you to do this as well with +set_distinct_by():

-tplyr_table(adae, TRTA) %>% 
-  add_layer(
-    group_count('Subjects with at least one adverse event') %>% 
-      set_distinct_by(USUBJID) %>% 
-      set_format_strings(f_str('xx', n))
-  ) %>% 
-  build() %>% 
-  kable()
+tplyr_table(adae, TRTA) %>% + add_layer( + group_count('Subjects with at least one adverse event') %>% + set_distinct_by(USUBJID) %>% + set_format_strings(f_str('xx', n)) + ) %>% + build() %>% + kable()
@@ -1105,17 +1279,22 @@

NA
-

There’s another trick going on here - to create a summary with row label text like you see above, text strings can be used as the target variables. Here, we use this in combination with set_distinct_by() to count distinct subjects.

-

Adverse event tables often call for counting AEs of something like a body system and counting actual events within that body system. ‘Tplyr’ has means of making this simple for the user as well.

+

There’s another trick going on here - to create a summary with row +label text like you see above, text strings can be used as the target +variables. Here, we use this in combination with +set_distinct_by() to count distinct subjects.

+

Adverse event tables often call for counting AEs of something like a +body system and counting actual events within that body system. ‘Tplyr’ +has means of making this simple for the user as well.

-tplyr_table(adae, TRTA) %>% 
-  add_layer(
-    group_count(vars(AEBODSYS, AEDECOD))
-  ) %>% 
-  build() %>% 
-  head() %>% 
-  kable()
- +tplyr_table(adae, TRTA) %>% + add_layer( + group_count(vars(AEBODSYS, AEDECOD)) + ) %>% + build() %>% + head() %>% + kable() +
@@ -1199,25 +1378,36 @@

-

Here we again use dplyr::vars() to specify multiple target variables. When used in a count layer, ‘Tplyr’ knows automatically that the first variable is a grouping variable for the second variable, and counts shall be produced for both then merged together.

+

Here we again use dplyr::vars() to specify multiple +target variables. When used in a count layer, ‘Tplyr’ knows +automatically that the first variable is a grouping variable for the +second variable, and counts shall be produced for both then merged +together.

-
-

-Shift Layers

-

Lastly, let’s talk about shift layers. A common example of this would be looking at a subject’s lab levels at baseline versus some designated evaluation point. This would tell us, for example, how many subjects were high at baseline for a lab test vs. after an intervention has been introduced. The shift layer in ‘Tplyr’ is intended for creating shift tables that show these data as a matrix, where one state will be presented in rows and the other in columns. Let’s look at an example.

+
+

Shift Layers +

+

Lastly, let’s talk about shift layers. A common example of this would +be looking at a subject’s lab levels at baseline versus some designated +evaluation point. This would tell us, for example, how many subjects +were high at baseline for a lab test vs. after an intervention has been +introduced. The shift layer in ‘Tplyr’ is intended for creating shift +tables that show these data as a matrix, where one state will be +presented in rows and the other in columns. Let’s look at an +example.

-# Tplyr can use factor orders to dummy values and order presentation
-adlb$ANRIND <- factor(adlb$ANRIND, c("L", "N", "H"))
-adlb$BNRIND <- factor(adlb$BNRIND, c("L", "N", "H"))
-
-tplyr_table(adlb, TRTA, where = PARAMCD == "CK") %>%
-  add_layer(
-    group_shift(vars(row=BNRIND, column=ANRIND), by=PARAM) %>% 
-      set_format_strings(f_str("xx (xxx%)", n, pct))
-  ) %>% 
-  build() %>% 
-  kable()
- +# Tplyr can use factor orders to dummy values and order presentation +adlb$ANRIND <- factor(adlb$ANRIND, c("L", "N", "H")) +adlb$BNRIND <- factor(adlb$BNRIND, c("L", "N", "H")) + +tplyr_table(adlb, TRTA, where = PARAMCD == "CK") %>% + add_layer( + group_shift(vars(row=BNRIND, column=ANRIND), by=PARAM) %>% + set_format_strings(f_str("xx (xxx%)", n, pct)) + ) %>% + build() %>% + kable() +
@@ -1301,65 +1491,84 @@

-

The underlying process of shift tables is the same as count layers - we’re counting the number of occurrences of something by a set of grouping variables. This differs in that ‘Tplyr’ uses the group_shift() API to use the same basic interface as other tables, but translate your target variables into the row variable and the column variable. Furthermore, there is some enhanced control over how denominators should behave that is necessary for a shift layer.

+

The underlying process of shift tables is the same as count layers - +we’re counting the number of occurrences of something by a set of +grouping variables. This differs in that ‘Tplyr’ uses the +group_shift() API to use the same basic interface as other +tables, but translate your target variables into the row variable and +the column variable. Furthermore, there is some enhanced control over +how denominators should behave that is necessary for a shift layer.

-
-

-Where to go from here?

-

There’s quite a bit more to learn! And we’ve prepared a number of other vignettes to help you get what you need out of ‘Tplyr’.

+
+

Where to go from here? +

+

There’s quite a bit more to learn! And we’ve prepared a number of +other vignettes to help you get what you need out of ‘Tplyr’.

-
-

-References

-

In building ‘Tplyr’, we needed some additional resources in addition to our personal experience to help guide design. PHUSE has done some great work to create guidance for standard outputs with collaboration between multiple pharmaceutical companies and the FDA. You can find some of the resource that we referenced below.

-

Analysis and Displays Associated with Adverse Events

-

Analyses and Displays Associated with Demographics, Disposition, and Medications

-

Analyses and Displays Associated with Measures of Central Tendency

-
-
- - -
+ diff --git a/docs/articles/count.html b/docs/articles/count.html index 11ff4430..18646917 100644 --- a/docs/articles/count.html +++ b/docs/articles/count.html @@ -4,7 +4,8 @@ - + + Count Layers • Tplyr @@ -12,14 +13,13 @@ - - - + + + - - + - + @@ -35,133 +35,119 @@ gtag('config', 'UA-165685385-2'); - -
-
-
- + diff --git a/docs/articles/custom-metadata.html b/docs/articles/custom-metadata.html new file mode 100644 index 00000000..681b8d9d --- /dev/null +++ b/docs/articles/custom-metadata.html @@ -0,0 +1,759 @@ + + + + + + + + +Creating Custom Tplyr Metadata • Tplyr + + + + + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +

As covered in vignette('metadata'), Tplyr can produce +metadata for any result that it calculates. But what about data that +Tplyr can’t produce, such as a efficacy results or some sort of custom +analysis? You may still want that drill down capability either on your +own or paired with an existing Tplyr table.

+

Take for instance Table 14-3.01 from the CDISC +Pilot. Skipping the actual construction of the table, here’s the +output data from Tplyr and some manual calculation:

+
+kable(full_data)
+ ++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
row_idrow_label1row_label2var1_Placebovar1_Xanomeline Low Dosevar1_Xanomeline High Dose
d1_1Baselinen798174
d2_1Mean (SD)24.1 (12.19)24.4 (12.92)21.3 (11.74)
d3_1Median (Range)21.0 ( 5;61)21.0 ( 5;57)18.0 ( 3;57)
d1_2Week 24n798174
d2_2Mean (SD)26.7 (13.79)26.4 (13.18)22.8 (12.48)
d3_2Median (Range)24.0 ( 5;62)25.0 ( 6;62)20.0 ( 3;62)
d1_3Change from Baselinen798174
d2_3Mean (SD)2.5 ( 5.80)2.0 ( 5.55)1.5 ( 4.26)
d3_3Median (Range)2.0 (-11;16)2.0 (-11;17)1.0 ( -7;13)
x4_1p-value(Dose Response) [1][2]0.245
x4_2
x4_3p-value(Xan - Placebo) [1][3]0.5690.233
x4_4Diff of LS Means (SE)-0.5 (0.82)-1.0 (0.84)
x4_595% CI(-2.1;1.1)(-2.7;0.7)
x4_6
x4_7p-value(Xan High - Xan Low) [1][3]0.520
x4_8Diff of LS Means (SE)-0.5 (0.84)
x4_995% CI(-2.2;1.1)
+

This is the primary efficacy table from the trial. The top portion of +this table is fairly straightforward with Tplyr and can be done using +descriptive statistic layers. Once you hit the p-values on the lower +house, this becomes beyond Tplyr’s remit. To produce the table, you can +combine Tplyr output with a separate data frame analyzed and formatted +yourself (but note you can still use some help from Tplyr tools like +apply_formats()).

+

But what about the metadata? How do you get the drill down +capabilities for that lower half of the table? We’ve provided a couple +additional tools in Tplyr to allow you to construct your own metadata +and append existing metadata present in a Tplyr table.

+
+

Build a tplyr_meta object +

+

As covered in vignette('metadata'), a +tplyr_meta object consists of two different fields: A list +of variable names, and a list of filter conditions. You provide both of +these fields as a list of quosures:

+
+m <- tplyr_meta(
+  names = quos(a, b, c),
+  filters = quos(a==1, b==2, c==3)
+)
+m
+#> tplyr_meta: 3 names, 3 filters
+#> Names:
+#>     a, b, c 
+#> Filters:
+#>     a == 1, b == 2, c == 3
+

The tplyr_meta() function can take these fields +immediately upon creation. If you need to dynamically create a +tplyr_meta object such as how Tplyr constructs the objects +internally), the functions add_variables() and +add_filters() are available to extend an existing +tplyr_meta object:

+
+m <- m %>% 
+  add_variables(quos(x)) %>% 
+  add_filters(quos(x == 'a'))
+
+m
+#> tplyr_meta: 4 names, 4 filters
+#> Names:
+#>     a, b, c, x 
+#> Filters:
+#>     a == 1, b == 2, c == 3, x == "a"
+
+
+

Building your own metadata table +

+

Now that we can create our own tplyr_meta objects, let’s +assemble the metadata for the bottom portion of Table 14-3.01:

+
+# Overall model subset of data
+meta <- tplyr_meta(
+  names = quos(TRTP, EFFFL, ITTFL, ANL01FL, SITEGR1, AVISIT, AVISITN, PARAMCD, AVAL, BASE, CHG),
+  filters = quos(EFFFL == "Y", ITTFL == "Y", PARAMCD == "ACTOT", ANL01FL == "Y", AVISITN == 24)
+)
+
+# Xan High / Placebo contrast
+meta_xhp <- meta %>% 
+  add_filters(quos(TRTP %in% c("Xanomeline High Dose", "Placebo")))
+
+# Xan Low / Placbo Contrast
+meta_xlp <- meta %>% 
+  add_filters(quos(TRTP %in% c("Xanomeline Low Dose", "Placebo")))
+
+# Xan High / Xan Low Contrast
+meta_xlh <- meta %>% 
+  add_filters(quos(TRTP %in% c("Xanomeline High Dose", "Xanomeline Low Dose")))
+
+eff_meta <- tibble::tribble(
+  ~"row_id",  ~"row_label1",                       ~"var1_Xanomeline Low Dose", ~"var1_Xanomeline High Dose",
+  "x4_1",    "p-value(Dose Response) [1][2]",      NULL,                        meta,
+  "x4_3",    "p-value(Xan - Placebo) [1][3]",        meta_xlp,                    meta_xhp,
+  "x4_4",    "   Diff of LS Means (SE)",           meta_xlp,                    meta_xhp,
+  "x4_5",    "   95% CI",                          meta_xlp,                    meta_xhp,
+  "x4_7",    "p-value(Xan High - Xan Low) [1][3]", NULL,                        meta_xlh,
+  "x4_8",    "   Diff of LS Means (SE)",           NULL,                        meta_xlh,
+  "x4_9",    "   95% CI",                          NULL,                        meta_xlh
+)
+

Let’s break down what happened here:

+
    +
  • First, we assemble the the overarching metadata object for the +model. A lot of this metadata is shared across each of the different +result cells for all of the efficacy data, so we can start by collecting +this information into a tplyr_meta object.
  • +
  • Next, we can use that starting point to build +tplyr_meta objects for the other result cells. The model +data contains contrasts of each of the different treatment group +comparisons. By using add_filters(), we can create those +additional three tplyr_meta objects using the starting +point and attaching an additional filter condition.
  • +
  • Lastly, to extend the metadata in the original +tplyr_table object that created the summary portion of this +table, we need a data frame. There’s a lot of ways to do this, but I +like the display and explicitness of +tibble::tribble().
  • +
+

When building a data frame for use with tplyr_table +metadata, there are really only two rules:

+
    +
  • You need a column in the data frame called row_id +
  • +
  • The row_id values cannot be duplicates of any other +value within the existing metadata.
  • +
+

The row_id values built by Tplyr will always follow the +format “n_n”, where the first letter of the +layer type will either be “c”, “d”, or “s”. The next number is the layer +number (i.e. the order in which the layer was inserted to the Tplyr +table), and then finally the row of that layer within the output. For +example, the third row of a count layer that was the second layer in the +table would have a row_id of “c2_3”. In this example, I +chose “x4_n” as the format for the “x” to symbolize custom, and these +data can be thought of as the fourth layer. That said, these values +would typically be masked by the viewer of the table so they really just +need to be unique - so you can choose whatever you want.

+
+
+

Appending Existing Tplyr Metadata +

+

Now that we’ve created our custom extension of the Tplyr metadata, +let’s extend the existing data frame. To do this, Tplyr has the function +append_metadata():

+
+t <- append_metadata(t, eff_meta)
+

Behind the scenes, this function simply binds the new metadata with +the old in the proper section of the tplyr_table object. +You can view the the tplyr_table metadata with the function +get_metadata():

+
+get_metadata(t)
+#> # A tibble: 16 × 6
+#>    row_id row_label1   row_label2 var1_Placebo `var1_Xanomeli…` `var1_Xanomeli…`
+#>    <chr>  <chr>        <chr>      <list>       <list>           <list>          
+#>  1 d1_1   "Baseline"   n          <tplyr_mt>   <tplyr_mt>       <tplyr_mt>      
+#>  2 d2_1   "Baseline"   Mean (SD)  <tplyr_mt>   <tplyr_mt>       <tplyr_mt>      
+#>  3 d3_1   "Baseline"   Median (R… <tplyr_mt>   <tplyr_mt>       <tplyr_mt>      
+#>  4 d1_2   "Week 24"    n          <tplyr_mt>   <tplyr_mt>       <tplyr_mt>      
+#>  5 d2_2   "Week 24"    Mean (SD)  <tplyr_mt>   <tplyr_mt>       <tplyr_mt>      
+#>  6 d3_2   "Week 24"    Median (R… <tplyr_mt>   <tplyr_mt>       <tplyr_mt>      
+#>  7 d1_3   "Change fro… n          <tplyr_mt>   <tplyr_mt>       <tplyr_mt>      
+#>  8 d2_3   "Change fro… Mean (SD)  <tplyr_mt>   <tplyr_mt>       <tplyr_mt>      
+#>  9 d3_3   "Change fro… Median (R… <tplyr_mt>   <tplyr_mt>       <tplyr_mt>      
+#> 10 x4_1   "p-value(Do… NA         <NULL>       <tplyr_mt>       <NULL>          
+#> 11 x4_3   "p-value(Xa… NA         <NULL>       <tplyr_mt>       <tplyr_mt>      
+#> 12 x4_4   "   Diff of… NA         <NULL>       <tplyr_mt>       <tplyr_mt>      
+#> 13 x4_5   "   95% CI"  NA         <NULL>       <tplyr_mt>       <tplyr_mt>      
+#> 14 x4_7   "p-value(Xa… NA         <NULL>       <tplyr_mt>       <NULL>          
+#> 15 x4_8   "   Diff of… NA         <NULL>       <tplyr_mt>       <NULL>          
+#> 16 x4_9   "   95% CI"  NA         <NULL>       <tplyr_mt>       <NULL>
+

Finally, as with the automatically created metadata from Tplyr, we +can query these result cells just the same:

+
+get_meta_subset(t, 'x4_1', "var1_Xanomeline High Dose") %>% 
+  head() %>% 
+  kable()
+ ++++++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
USUBJIDTRTPEFFFLITTFLANL01FLSITEGR1AVISITAVISITNPARAMCDAVALBASECHG
01-701-1015PlaceboYYY701Week 2424ACTOT813-5
01-701-1023PlaceboYYY701Week 2424ACTOT1213-1
01-701-1028Xanomeline High DoseYYY701Week 2424ACTOT330
01-701-1033Xanomeline Low DoseYYY701Week 2424ACTOT770
01-701-1034Xanomeline High DoseYYY701Week 2424ACTOT11110
01-701-1047PlaceboYYY701Week 2424ACTOT19109
+
+
+

Metadata Without Tplyr +

+

You very well may have a scenario where you want to use these +metadata functions outside of Tplyr in general. As such, there are S3 +methods available to query metadata from a dataframe instead of a Tplyr +table, and parameters to provide your own target data frame:

+
+get_meta_subset(eff_meta, 'x4_1', "var1_Xanomeline High Dose", target=adas) %>% 
+  head() %>% 
+  kable()
+ ++++++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
USUBJIDTRTPEFFFLITTFLANL01FLSITEGR1AVISITAVISITNPARAMCDAVALBASECHG
01-701-1015PlaceboYYY701Week 2424ACTOT813-5
01-701-1023PlaceboYYY701Week 2424ACTOT1213-1
01-701-1028Xanomeline High DoseYYY701Week 2424ACTOT330
01-701-1033Xanomeline Low DoseYYY701Week 2424ACTOT770
01-701-1034Xanomeline High DoseYYY701Week 2424ACTOT11110
01-701-1047PlaceboYYY701Week 2424ACTOT19109
+

As with the Tplyr metadata, the only strict criteria here is that +your custom metadata have a row_id column.

+
+
+

Tying it Together +

+

The vignette wouldn’t be complete without the final contextual +example - so here we go. Ultimately these pieces an all fit together in +the context of a Shiny application and give you the desired +click-through experience.

+ +

Source code available here

+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/denom.html b/docs/articles/denom.html index d2fd95b8..3b1420b7 100644 --- a/docs/articles/denom.html +++ b/docs/articles/denom.html @@ -4,7 +4,8 @@ - + + Totals, Missings, and Denominators • Tplyr @@ -12,14 +13,13 @@ - - - + + + - - + - + @@ -35,132 +35,127 @@ gtag('config', 'UA-165685385-2'); - -
-
-
- @@ -1322,37 +1365,66 @@

-

Our hope is that this gives you the flexibility you need to structure your denominator however required.

+

Our hope is that this gives you the flexibility you need to structure +your denominator however required.

-
-

-Controlling the Denominator Filter

-

There are some circumstances that you’ll encounter where the filter used for a denominator needs to be different than the filter used to count. Disposition tables are an example of this, and we’ll use that example to paint this picture.

-

‘Tplyr’ offers you the ability to specifically control the filter used within the denominator. This is provided through the function set_denom_where(). The default for set_denom_where() is the layer level where parameter, if one was supplied. set_denom_where() allows you to replace this layer level filter with a custom filter of your choosing. This is done on top of any filtering specified in the tplyr_table() where parameter - which means that the set_denom_where() filter is applied in addition to any table level filtering.

-

Yeah we know - there are a lot of different places that filtering can happen…

-

So let’s take the example shown below. The first layer has no layer level filtering applied, so the table level where is the only filter applied. The second layer has a layer level filter applied, so the denominators will be based on that layer level filter. Notice how in this case, the percentages in the second layer add up to 100%. This is because the denominator only includes values used in that layer.

-

The third layer has a layer level filter applied, but additionally uses set_denom_where(). The set_denom_where() in this example is actually removing the layer level filter for the denominators. This is because in R, when you filter using TRUE, the filter returns all records. So by using TRUE in set_denom_where(), the layer level filter is effectively removed. This causes the denominator to include all values available from the table and not just those selected for that layer - so for this layer, the percentages will not add up to 100%. This is important - this allows the percentages from Layer 3 to sum to the total percentage of “DISCONTINUED” from Layer 1.

+
+

Controlling the Denominator Filter +

+

There are some circumstances that you’ll encounter where the filter +used for a denominator needs to be different than the filter used to +count. Disposition tables are an example of this, and we’ll use that +example to paint this picture.

+

‘Tplyr’ offers you the ability to specifically control the filter +used within the denominator. This is provided through the function +set_denom_where(). The default for +set_denom_where() is the layer level where +parameter, if one was supplied. set_denom_where() allows +you to replace this layer level filter with a custom filter of your +choosing. This is done on top of any filtering specified in the +tplyr_table() where parameter - which means that the +set_denom_where() filter is applied in addition to +any table level filtering.

+

Yeah we know - there are a lot of different places that filtering can +happen…

+

So let’s take the example shown below. The first layer has no layer +level filtering applied, so the table level where is the +only filter applied. The second layer has a layer level filter applied, +so the denominators will be based on that layer level filter. Notice how +in this case, the percentages in the second layer add up to 100%. This +is because the denominator only includes values used in that layer.

+

The third layer has a layer level filter applied, but additionally +uses set_denom_where(). The set_denom_where() +in this example is actually removing the layer level filter for +the denominators. This is because in R, when you filter using +TRUE, the filter returns all records. So by using +TRUE in set_denom_where(), the layer level +filter is effectively removed. This causes the denominator to include +all values available from the table and not just those selected for that +layer - so for this layer, the percentages will not add up to +100%. This is important - this allows the percentages from Layer 3 +to sum to the total percentage of “DISCONTINUED” from Layer 1.

-adsl2 <- adsl %>% 
-  mutate(DISCONTEXT = if_else(DISCONFL == 'Y', 'DISCONTINUED', 'COMPLETED'))
-
-t <- tplyr_table(adsl2, TRT01P, where = SAFFL == 'Y') %>%
-  add_layer(
-    group_count(DISCONTEXT)
-  ) %>%
-  add_layer(
-    group_count(DCSREAS, where = DISCONFL == 'Y')
-  ) %>%
-  add_layer(
-    group_count(DCSREAS, where = DISCONFL == 'Y') %>% 
-    set_denom_where(TRUE)
-  ) %>%
-  build() %>%
-  arrange(ord_layer_index, ord_layer_1) 
-
-t %>% 
-  kable()
+adsl2 <- adsl %>% + mutate(DISCONTEXT = if_else(DISCONFL == 'Y', 'DISCONTINUED', 'COMPLETED')) + +t <- tplyr_table(adsl2, TRT01P, where = SAFFL == 'Y') %>% + add_layer( + group_count(DISCONTEXT) + ) %>% + add_layer( + group_count(DCSREAS, where = DISCONFL == 'Y') + ) %>% + add_layer( + group_count(DCSREAS, where = DISCONFL == 'Y') %>% + set_denom_where(TRUE) + ) %>% + build() %>% + arrange(ord_layer_index, ord_layer_1) + +t %>% + kable()
@@ -1534,27 +1606,48 @@

-
-

-Missing Counts

-

Missing counts are a tricky area for frequency tables, and they play directly in with denominators as well. These values raise a number of questions. For example, do you want to format the missing counts the same way as the event counts? Do you want to present missing counts with percentages? Do missing counts belong in the denominator?

-

The set_missing_count() function can take a new f_str() object to set the display of missing values. If not specified, the associated count layer’s format will be used. Using the ... parameter, you are able to specify the row label desired for missing values and values that you determine to be considered ‘missing’. For example, you may have NA values in the target variable, and then values like “Not Collected” that you also wish to consider “missing”. set_missing_count() allows you to group those together. Actually - you’re able to establish as many different “missing” groups as you want - even though that scenario is fairly unlikely.

-

In the example below 50 random values are removed and NA is specified as the missing string. This leads us to another parameter - denom_ignore. By default, if you specify missing values they will still be considered within the denominator, but when you have missing counts, you may wish to exclude them from the totals being summarized. By setting denom_ignore to TRUE, your denominators will ignore any groups of missing values that you’ve specified.

+
+

Missing Counts +

+

Missing counts are a tricky area for frequency tables, and they play +directly in with denominators as well. These values raise a number of +questions. For example, do you want to format the missing counts the +same way as the event counts? Do you want to present missing counts with +percentages? Do missing counts belong in the denominator?

+

The set_missing_count() function can take a new +f_str() object to set the display of missing values. If not +specified, the associated count layer’s format will be used. Using the +... parameter, you are able to specify the row label +desired for missing values and values that you determine to be +considered ‘missing’. For example, you may have NA values in the target +variable, and then values like “Not Collected” that you also wish to +consider “missing”. set_missing_count() allows you to group +those together. Actually - you’re able to establish as many different +“missing” groups as you want - even though that scenario is fairly +unlikely.

+

In the example below 50 random values are removed and NA is specified +as the missing string. This leads us to another parameter - +denom_ignore. By default, if you specify missing values +they will still be considered within the denominator, but when you have +missing counts, you may wish to exclude them from the totals being +summarized. By setting denom_ignore to TRUE, your +denominators will ignore any groups of missing values that you’ve +specified.

-adae2 <- adae
-adae2[sample(nrow(adae2), 50), "AESEV"] <- NA
-
-t <- tplyr_table(adae2, TRTA) %>%
-  add_layer(
-    group_count(AESEV) %>%
-      set_format_strings(f_str("xxx (xx.xx%)", n, pct)) %>%
-      set_missing_count(f_str("xxx", n), sort_value=Inf, denom_ignore=TRUE, Missing = NA)
-  ) %>%
-  build() %>% 
-  arrange(ord_layer_1)
-
-t %>% 
-  kable()
+adae2 <- adae +adae2[sample(nrow(adae2), 50), "AESEV"] <- NA + +t <- tplyr_table(adae2, TRTA) %>% + add_layer( + group_count(AESEV) %>% + set_format_strings(f_str("xxx (xx.xx%)", n, pct)) %>% + set_missing_count(f_str("xxx", n), sort_value=Inf, denom_ignore=TRUE, Missing = NA) + ) %>% + build() %>% + arrange(ord_layer_1) + +t %>% + kable()
@@ -1575,67 +1668,104 @@

- - - + + + - - - + + + - - + + - - - + + +
MILD23 (62.16%)67 (75.28%)47 (47.00%)23 (54.76%)70 (77.78%)44 (46.81%) 1 1
MODERATE14 (37.84%)21 (23.60%)49 (49.00%)19 (45.24%)19 (21.11%)46 (48.94%) 1 2
SEVERE 0 ( 0.00%)1 ( 1.12%)4 ( 4.00%)1 ( 1.11%)4 ( 4.26%) 1 3
Missing10221852124 1 Inf
-

We did one more other thing worth explaining in the example above - gave the missing count its own sort value. If you leave this field null, it will simply be the maximum value in the order layer plus 1, to put the Missing counts at the bottom during an ascending sort. But tables can be sorted a lot of different ways, as you’ll see in the sort vignette. So instead of trying to come up with novel ways for you to control where the missing row goes - we decided to just let you specify your own value.

+

We did one more other thing worth explaining in the example above - +gave the missing count its own sort value. If you leave this field null, +it will simply be the maximum value in the order layer plus 1, to put +the Missing counts at the bottom during an ascending sort. But tables +can be sorted a lot of different ways, as you’ll see in the sort +vignette. So instead of trying to come up with novel ways for you to +control where the missing row goes - we decided to just let you specify +your own value.

-
-

-Adding a ‘Total’ Row

-

In addition to missing counts, some summaries require the addition of a ‘Total’ row. ‘Tplyr’ has the helper function add_total_row() to ease this process for you. Like most other things within ‘Tplyr’ - particularly in this vignette - this too has a significant bit of nuance to it.

-

Much of this functionality is similar to set_missing_count(). You’re able to specify a different format for the total, but if not specified, the associated count layer’s format will be used. You’re able to set your own sort value to specify where you want the total row to sit.

+
+

Adding a ‘Total’ Row +

+

In addition to missing counts, some summaries require the addition of +a ‘Total’ row. ‘Tplyr’ has the helper function +add_total_row() to ease this process for you. Like most +other things within ‘Tplyr’ - particularly in this vignette - this too +has a significant bit of nuance to it.

+

Much of this functionality is similar to +set_missing_count(). You’re able to specify a different +format for the total, but if not specified, the associated count layer’s +format will be used. You’re able to set your own sort value to specify +where you want the total row to sit.

More nuance comes in two places:

    -
  • By default, add_total_row() will count missing values, but you can exclude those values using the count_missings parameter. ‘Tplyr’ will warn you when set_count_missing() has denom_ignore set to TRUE, add_total_row() has count_missings set to TRUE and the format contains a percentage. Why? Because if the denominator is ignoring missing values but you’re still counting them in your total, the percentage shown can exceed 100%.
  • +
  • By default, add_total_row() will count missing +values, but you can exclude those values using the +count_missings parameter. ‘Tplyr’ will warn you when +set_count_missing() has denom_ignore set to +TRUE, add_total_row() has +count_missings set to TRUE and the format +contains a percentage. Why? Because if the denominator is ignoring +missing values but you’re still counting them in your total, the +percentage shown can exceed 100%.
  • -add_total_row() will throw a warning when a by variable is used, because it becomes ambiguous what total should be calculated. You can rectify this by using set_denoms_by(), which allows the user to control exactly which groups are used to form the denominator. This way the totals presented by add_total_row() will align with denominators specified in set_denom_by() and generate total rows that match the grouping of your denominator values.
  • +add_total_row() will throw a warning when a +by variable is used, because it becomes ambiguous what +total should be calculated. You can rectify this by using +set_denoms_by(), which allows the user to control exactly +which groups are used to form the denominator. This way the totals +presented by add_total_row() will align with denominators +specified in set_denom_by() and generate total rows that +match the grouping of your denominator values.
-

In the example below, we summarize age groups by sex. The denominators are determined by treatment group and sex, and since we are not excluding any values from the denominator, the total row ends up matching the denominator that was used. The ‘Missing’ row tells us the number of missing values, but because count_missings is set to TRUE, the missing counts are included in the total row. This probably isn’t how you would choose to display things, but here we’re trying to show the flexibility built into ‘Tplyr’.

+

In the example below, we summarize age groups by sex. The +denominators are determined by treatment group and sex, and since we are +not excluding any values from the denominator, the total row ends up +matching the denominator that was used. The ‘Missing’ row tells us the +number of missing values, but because count_missings is set +to TRUE, the missing counts are included in the total row. +This probably isn’t how you would choose to display things, but here +we’re trying to show the flexibility built into ‘Tplyr’.

-adsl2 <- adsl
-adsl2[sample(nrow(adsl2), 50), "AGEGR1"] <- NA
-
-tplyr_table(adsl2, TRT01P) %>% 
-  add_layer(
-    group_count(AGEGR1, by=SEX) %>% 
-      set_denoms_by(TRT01P, SEX) %>%  # This gives me a Total row each group
-      add_total_row(f_str("xxx", n), count_missings=TRUE, sort_value=-Inf) %>% 
-      set_total_row_label("All Age Groups") %>% 
-      set_missing_count(f_str("xx (xx.x%)", n, pct), Missing = NA, sort_value=Inf)
-  ) %>% 
-  build() %>% 
-  arrange(ord_layer_1, ord_layer_2) %>% 
-  kable()
+adsl2 <- adsl +adsl2[sample(nrow(adsl2), 50), "AGEGR1"] <- NA + +tplyr_table(adsl2, TRT01P) %>% + add_layer( + group_count(AGEGR1, by=SEX) %>% + set_denoms_by(TRT01P, SEX) %>% # This gives me a Total row each group + add_total_row(f_str("xxx", n), count_missings=TRUE, sort_value=-Inf) %>% + set_total_row_label("All Age Groups") %>% + set_missing_count(f_str("xx (xx.x%)", n, pct), Missing = NA, sort_value=Inf) + ) %>% + build() %>% + arrange(ord_layer_1, ord_layer_2) %>% + kable()
@@ -1671,9 +1801,9 @@

- + - + @@ -1681,9 +1811,9 @@

- + - + @@ -1693,7 +1823,7 @@

- + @@ -1701,9 +1831,9 @@

- + - + @@ -1721,9 +1851,9 @@

- + - + @@ -1741,8 +1871,8 @@

- - + + @@ -1751,29 +1881,34 @@

- - - + + +
F <655 ( 9.4%)9 ( 17.0%) 5 ( 12.5%)4 ( 8.0%)5 ( 10.0%) 1 1 1
F >8018 ( 34.0%)15 ( 28.3%) 6 ( 15.0%)13 ( 26.0%)12 ( 24.0%) 1 1 265-80 18 ( 34.0%) 23 ( 57.5%)26 ( 52.0%)24 ( 48.0%) 1 1 3
F Missing12 (22.6%)11 (20.8%) 6 (15.0%)7 (14.0%)9 (18.0%) 1 1 Inf
M <655 ( 15.2%)4 ( 12.1%) 3 ( 6.8%)2 ( 5.9%)3 ( 8.8%) 1 2 1
M 65-8016 ( 48.5%)20 ( 45.5%)15 ( 45.5%)22 ( 50.0%) 16 ( 47.1%) 1 2
M Missing6 (18.2%)13 (29.5%)6 (17.6%)8 (24.2%)11 (25.0%)5 (14.7%) 1 2 Inf
-

The default text for the Total row is “Total”, but we provide set_total_row_label() to allow you to customize the text used in your display.

-

Let’s look at a more practical version of the table above. If you display missings, you probably want to exclude them from the total. Here we do that using set_missing_count(). So more commonly, you’ll see this:

+

The default text for the Total row is “Total”, but we provide +set_total_row_label() to allow you to customize the text +used in your display.

+

Let’s look at a more practical version of the table above. If you +display missings, you probably want to exclude them from the total. Here +we do that using set_missing_count(). So more commonly, +you’ll see this:

-tplyr_table(adsl2, TRT01P) %>% 
-  add_layer(
-    group_count(AGEGR1, by=SEX) %>% 
-      set_denoms_by(TRT01P, SEX) %>%  # This gives me a Total row each group
-      add_total_row(f_str("xxx", n), count_missings=FALSE, sort_value=-Inf) %>% 
-      set_total_row_label("All Age Groups") %>% 
-      set_missing_count(f_str("xxx", n), Missing = NA, sort_value=Inf, denom_ignore=TRUE)
-  ) %>% 
-  build() %>% 
-  arrange(ord_layer_1, ord_layer_2) %>% 
-  kable()
+tplyr_table(adsl2, TRT01P) %>% + add_layer( + group_count(AGEGR1, by=SEX) %>% + set_denoms_by(TRT01P, SEX) %>% # This gives me a Total row each group + add_total_row(f_str("xxx", n), count_missings=FALSE, sort_value=-Inf) %>% + set_total_row_label("All Age Groups") %>% + set_missing_count(f_str("xxx", n), Missing = NA, sort_value=Inf, denom_ignore=TRUE) + ) %>% + build() %>% + arrange(ord_layer_1, ord_layer_2) %>% + kable()
@@ -1799,9 +1934,9 @@

- + - + @@ -1809,9 +1944,9 @@

- + - + @@ -1819,9 +1954,9 @@

- + - + @@ -1829,9 +1964,9 @@

- + - + @@ -1839,9 +1974,9 @@

- + - + @@ -1849,9 +1984,9 @@

- - - + + + @@ -1859,9 +1994,9 @@

- - - + + + @@ -1869,9 +2004,9 @@

- - - + + + @@ -1879,9 +2014,9 @@

- - - + + + @@ -1889,43 +2024,48 @@

- - - + + +
F All Age Groups4142 344341 1 1 -Inf
F <655 ( 12.2%)9 ( 21.4%) 5 ( 14.7%)4 ( 9.3%)5 ( 12.2%) 1 1 1
F >8018 ( 43.9%)15 ( 35.7%) 6 ( 17.6%)13 ( 30.2%)12 ( 29.3%) 1 1 2
F 65-8018 ( 43.9%)18 ( 42.9%) 23 ( 67.6%)26 ( 60.5%)24 ( 58.5%) 1 1 3
F Missing1211 679 1 1 Inf
M All Age Groups273128253329 1 2 -Inf
M <655 ( 18.5%)3 ( 9.7%)2 ( 7.1%)4 ( 16.0%)3 ( 9.1%)3 ( 10.3%) 1 2 1
M >806 ( 22.2%)8 ( 25.8%)10 ( 35.7%)6 ( 24.0%)8 ( 24.2%)10 ( 34.5%) 1 2 2
M 65-8016 ( 59.3%)20 ( 64.5%)16 ( 57.1%)15 ( 60.0%)22 ( 66.7%)16 ( 55.2%) 1 2 3
M Missing61368115 1 2 Inf
-

Now the table is more intuitive. We used set_missing_count() to update our denominators, so missing have been excluded. Now, the total row intuitively matches the denominators used within each group, and we can see how many missing records were excluded.

+

Now the table is more intuitive. We used +set_missing_count() to update our denominators, so missing +have been excluded. Now, the total row intuitively matches the +denominators used within each group, and we can see how many missing +records were excluded.

+

You may have stumbled upon this portion of the vignette while +searching for how to create a total column. Tplyr allows you to do this +as well with the function add_total_group() and read more +in vignette("table").

And that’s it for denominators! Happy counting!

-
- - - +
-
+ diff --git a/docs/articles/desc.html b/docs/articles/desc.html index b9632f3d..7e5845ea 100644 --- a/docs/articles/desc.html +++ b/docs/articles/desc.html @@ -4,7 +4,8 @@ - + + Descriptive Statistic Layers • Tplyr @@ -12,14 +13,13 @@ - - - + + + - - + - + @@ -35,129 +35,110 @@ gtag('config', 'UA-165685385-2'); - -
-
-
- -
-

-Formatting

-

A lot of the nuance to formatting descriptive statistics layers has already been covered above, but there are a couple more tricks to getting the most out of ‘Tplyr’. One of these tricks is filling empty values.

-

By default, if there is no available value for a summary in a particular observation, the result being presented will be blanked out.

+
+

Formatting +

+

A lot of the nuance to formatting descriptive statistics layers has +already been covered above, but there are a couple more tricks to +getting the most out of ‘Tplyr’. One of these tricks is filling empty +values.

+

By default, if there is no available value for a summary in a +particular observation, the result being presented will be blanked +out.

+

Note: Tplyr generally respects factor levels - so in instances of +a missing row or column group, if the factor level is present, then the +variable or row will still generate)

-adlb_2 <- adlb %>% 
-  filter(TRTA != "Placebo")
-
-tplyr_table(adlb_2, TRTA) %>% 
-  set_pop_data(adsl) %>% 
-  set_pop_treat_var(TRT01P) %>% 
-  add_layer(
-    group_desc(AVAL, by=PARAMCD) %>% 
-      set_format_strings('Mean (SD)' = f_str('xxx (xxx)', mean, sd))
-  ) %>% 
-  build() %>% 
-  head() %>% 
-  select(-starts_with("ord")) %>% 
-  kable()
+adsl$TRT01P <- as.factor(adsl$TRT01P) +adlb$TRTA <- as.factor(adlb$TRTA) + +adlb_2 <- adlb %>% + filter(TRTA != "Placebo") + +tplyr_table(adlb_2, TRTA) %>% + set_pop_data(adsl) %>% + set_pop_treat_var(TRT01P) %>% + add_layer( + group_desc(AVAL, by=PARAMCD) %>% + set_format_strings('Mean (SD)' = f_str('xxx (xxx)', mean, sd)) + ) %>% + build() %>% + head() %>% + select(-starts_with("ord")) %>% + kable()
+++++++ + @@ -561,55 +656,69 @@

+ + + + +
row_label1 row_label2 var1_Xanomeline High Dose var1_Xanomeline Low Dosevar1_Placebo
Mean (SD) 5 ( 1) 6 ( 3)
CA Mean (SD) 2 ( 0) 2 ( 0)
CK Mean (SD) 64 ( 94) 58 ( 78)
GGT Mean (SD) 17 ( 49) 21 ( 27)
URATE Mean (SD) 271 ( 88) 231 ( 87)
-

Note how the entire example above has all records in var1_Placebo missing. ‘Tplyr’ gives you control over how you fill this space. Let’s say that we wanted instead to make that space say “Missing”. You can control this with the f_str() object using the empty parameter

+

Note how the entire example above has all records in +var1_Placebo missing. ‘Tplyr’ gives you control over how +you fill this space. Let’s say that we wanted instead to make that space +say “Missing”. You can control this with the f_str() object +using the empty parameter.

-adlb_2 <- adlb %>% 
-  filter(TRTA != "Placebo")
-
-tplyr_table(adlb_2, TRTA) %>% 
-  set_pop_data(adsl) %>% 
-  set_pop_treat_var(TRT01P) %>% 
-  add_layer(
-    group_desc(AVAL, by=PARAMCD) %>% 
-      set_format_strings('Mean (SD)' = f_str('xxx.xx (xxx.xxx)', mean, sd, empty=c(.overall="MISSING")))
-  ) %>% 
-  build() %>% 
-  head() %>% 
-  select(-starts_with("ord")) %>% 
-  kable()
+tplyr_table(adlb_2, TRTA) %>% + set_pop_data(adsl) %>% + set_pop_treat_var(TRT01P) %>% + add_layer( + group_desc(AVAL, by=PARAMCD) %>% + set_format_strings('Mean (SD)' = f_str('xxx.xx (xxx.xxx)', mean, sd, empty=c(.overall="MISSING"))) + ) %>% + build() %>% + head() %>% + select(-starts_with("ord")) %>% + kable()
+++++++ + @@ -617,55 +726,70 @@

+ + + + +
row_label1 row_label2 var1_Xanomeline High Dose var1_Xanomeline Low Dosevar1_Placebo
Mean (SD) 4.57 ( 1.301) 5.71 ( 2.940)MISSING
CA Mean (SD) 2.19 ( 0.137) 2.15 ( 0.083)MISSING
CK Mean (SD) 64.25 ( 93.986) 58.33 ( 77.915)MISSING
GGT Mean (SD) 16.75 ( 48.692) 21.33 ( 26.989)MISSING
URATE Mean (SD) 271.23 ( 88.161) 230.98 ( 87.006)MISSING
-

Look at the empty parameter above. Here, we use a named character vector, where the name is .overall. When this name is used, if all elements within the cell are missing, they will be filled with the specified text. Otherwise, the provided string will fill just the missing parameter. In some cases, this may not be what you’d like to see. Perhaps we want a string that fills each missing space.

+

Look at the empty parameter above. Here, we use a named +character vector, where the name is .overall. When this +name is used, if all elements within the cell are missing, they will be +filled with the specified text. Otherwise, the provided string will fill +just the missing parameter. In some cases, this may not be what you’d +like to see. Perhaps we want a string that fills each missing space.

-adlb_2 <- adlb %>% 
-  filter(TRTA != "Placebo")
-
-tplyr_table(adlb_2, TRTA) %>% 
-  set_pop_data(adsl) %>% 
-  set_pop_treat_var(TRT01P) %>% 
-  add_layer(
-    group_desc(AVAL, by=PARAMCD) %>% 
-      set_format_strings('Mean (SD)' = f_str('xxx.xx (xxx.xxx)', mean, sd, empty=c("NA")))
-  ) %>% 
-  build() %>% 
-  head() %>%
-  select(-starts_with("ord")) %>%
-  kable()
+tplyr_table(adlb_2, TRTA) %>% + set_pop_data(adsl) %>% + set_pop_treat_var(TRT01P) %>% + add_layer( + group_desc(AVAL, by=PARAMCD) %>% + set_format_strings('Mean (SD)' = f_str('xxx.xx (xxx.xxx)', mean, sd, empty=c("NA"))) + ) %>% + build() %>% + head() %>% + select(-starts_with("ord")) %>% + kable()
+++++++ + @@ -673,61 +797,81 @@

+ + + + +
row_label1 row_label2 var1_Xanomeline High Dose var1_Xanomeline Low Dosevar1_Placebo
Mean (SD) 4.57 ( 1.301) 5.71 ( 2.940)NA ( NA)
CA Mean (SD) 2.19 ( 0.137) 2.15 ( 0.083)NA ( NA)
CK Mean (SD) 64.25 ( 93.986) 58.33 ( 77.915)NA ( NA)
GGT Mean (SD) 16.75 ( 48.692) 21.33 ( 26.989)NA ( NA)
URATE Mean (SD) 271.23 ( 88.161) 230.98 ( 87.006)NA ( NA)
-

In the example above, instead of filling the whole space, the empty text of “NA” replaces the empty value for each element. So for ‘Mean (SD)’, we now have ‘NA ( NA)’. Note that the proper padding was still used for ‘NA’ to make sure the parentheses still align with populated records.

-
-

-Auto Precision

-

You may have noticed that the approach to formatting covered so far leaves a lot to be desired. Consider analyzing lab results, where you may want precision to vary based on the collected precision of the tests. Furthermore, depending on the summary being presented, you may wish to increase the precision further. For example, you may want the mean to be at collected precision +1 decimal place, and for standard deviation +2.

-

‘Tplyr’ has this covered using auto-precision. Auto-precision allows you to format your numeric summaries based on the precision of the data collected. This has all been built into the format strings, because a natural place to specify your desired format is where you specify how you want your data presented. If you wish to use auto-precision, use a instead of x when creating your summaries. Note that only one a is needed on each side of a decimal. To use increased precision, use a+n where n is the number of additional spaces you wish to add.

+

In the example above, instead of filling the whole space, the +empty text of “NA” replaces the empty value for each +element. So for ‘Mean (SD)’, we now have ‘NA ( NA)’. Note that the +proper padding was still used for ‘NA’ to make sure the parentheses +still align with populated records.

+
+

Auto Precision +

+

You may have noticed that the approach to formatting covered so far +leaves a lot to be desired. Consider analyzing lab results, where you +may want precision to vary based on the collected precision of the +tests. Furthermore, depending on the summary being presented, you may +wish to increase the precision further. For example, you may want the +mean to be at collected precision +1 decimal place, and for standard +deviation +2.

+

‘Tplyr’ has this covered using auto-precision. Auto-precision allows +you to format your numeric summaries based on the precision of the data +collected. This has all been built into the format strings, because a +natural place to specify your desired format is where you specify how +you want your data presented. If you wish to use auto-precision, use +a instead of x when creating your summaries. +Note that only one a is needed on each side of a decimal. +To use increased precision, use a+n where n is +the number of additional spaces you wish to add.

-
-tplyr_table(adlb, TRTA) %>% 
-  add_layer(
-    group_desc(AVAL, by = PARAMCD) %>% 
-      set_format_strings(
-        'Mean (SD)' = f_str('a.a+1 (a.a+2)', mean, sd)
-      )
-  ) %>% 
-  build() %>% 
-  head(20) %>% 
-  kable()
+tplyr_table(adlb, TRTA) %>% + add_layer( + group_desc(AVAL, by = PARAMCD) %>% + set_format_strings( + 'Mean (SD)' = f_str('a.a+1 (a.a+2)', mean, sd) + ) + ) %>% + build() %>% + head(20) %>% + select(-starts_with("ord")) %>% + kable()
-------++++ @@ -735,9 +879,6 @@

- - - @@ -746,9 +887,6 @@

- - - @@ -756,9 +894,6 @@

- - - @@ -766,9 +901,6 @@

- - - @@ -776,9 +908,6 @@

- - - @@ -786,36 +915,41 @@

- - -
row_label1var1_Placebo var1_Xanomeline High Dose var1_Xanomeline Low Doseord_layer_indexord_layer_1ord_layer_2
4.7430 ( 2.05463) 4.5696 ( 1.30148) 5.7120 ( 2.94018)111
CA2.165660 (0.0692494) 2.189362 (0.1372011) 2.145700 (0.0830867)121
CK72.4 ( 288.41) 64.2 ( 93.99) 58.3 ( 77.91)131
GGT17.8 ( 34.77) 16.8 ( 48.69) 21.3 ( 26.99)141
URATE235.9373 ( 83.69662) 271.2288 ( 88.16093) 230.9807 ( 87.00646)151
-

As you can see, the decimal precision is now varying depending on the test being performed. Notice that both the integer and the decimal side of each number fluctuate as well. Tpylr collects both the integer and decimal precision, and you can specify both separately. For example, you could use x’s to specify a default number of spaces for your integers that are used consistently across by variables, but vary the decimal precision based on collected data. You can also increment the number of spaces for both integer and decimal separately.

-

But - this is kind of ugly, isn’t it? Do we really need all 6 decimal places collected for CA? For this reason, you’re able to set a cap on the precision that’s displayed:

+

As you can see, the decimal precision is now varying depending on the +test being performed. Notice that both the integer and the decimal side +of each number fluctuate as well. Tpylr collects both the +integer and decimal precision, and you can specify both separately. For +example, you could use x’s to specify a default number of +spaces for your integers that are used consistently across by variables, +but vary the decimal precision based on collected data. You can also +increment the number of spaces for both integer and decimal +separately.

+

But - this is kind of ugly, isn’t it? Do we really need all 6 decimal +places collected for CA? For this reason, you’re able to set a cap on +the precision that’s displayed:

-tplyr_table(adlb, TRTA) %>% 
-  add_layer(
-    group_desc(AVAL, by = PARAMCD) %>% 
-      set_format_strings(
-        'Mean (SD)' = f_str('a.a+1 (a.a+2)', mean, sd),
-        cap = c(int=3, dec=2)
-      )
-  ) %>% 
-  build() %>% 
-  head(20) %>% 
-  kable()
- +tplyr_table(adlb, TRTA) %>% + add_layer( + group_desc(AVAL, by = PARAMCD) %>% + set_format_strings( + 'Mean (SD)' = f_str('a.a+1 (a.a+2)', mean, sd), + cap = c(int=3, dec=2) + ) + ) %>% + build() %>% + head(20) %>% + select(-starts_with("ord")) %>% + kable() +
--------+++++ @@ -823,9 +957,6 @@

- - - @@ -834,9 +965,6 @@

- - - @@ -844,9 +972,6 @@

- - - @@ -854,9 +979,6 @@

- - - @@ -864,9 +986,6 @@

- - - @@ -874,44 +993,56 @@

- - -
row_label1var1_Placebo var1_Xanomeline High Dose var1_Xanomeline Low Doseord_layer_indexord_layer_1ord_layer_2
4.743 ( 2.0546) 4.570 ( 1.3015) 5.712 ( 2.9402)111
CA2.166 (0.0692) 2.189 (0.1372) 2.146 (0.0831)121
CK72.4 (288.41) 64.2 ( 93.99) 58.3 ( 77.91)131
GGT17.8 ( 34.77) 16.8 ( 48.69) 21.3 ( 26.99)141
URATE235.937 ( 83.6966) 271.229 ( 88.1609) 230.981 ( 87.0065)151
-

Now that looks better. The cap argument is part of set_format_strings(). You need to specify the integer and decimal caps separately. Note that integer precision works slightly differently than decimal precision. Integer precision relates to the length allotted for the left side of a decimal, but integers will not truncate. When using ‘x’ formatting, if an integer exceeds the set length, it will push the number over. If the integer side of auto-precision is not capped, the necessary length for an integer in the associated by group will be as long as necessary. Decimals, on the other hand, round to the specified length. These caps apply to the length allotted for the “a” on either the integer or the decimal. So for example, if the decimal length is capped at 2 and the selected precision is “a+1”, then 3 decimal places will be allotted.

-

This was a basic situation, but if you’re paying close attention, you may have some questions. What if you have more by variables, like by visit AND test. Do we then calculate precision by visit and test? What if collected precision is different per visit and we don’t want that? What about multiple summary variables? How do we determine precision then? We have modifier functions for this:

+

Now that looks better. The cap argument is part of +set_format_strings(). You need to specify the integer and +decimal caps separately. Note that integer precision works slightly +differently than decimal precision. Integer precision relates to the +length allotted for the left side of a decimal, but integers will not +truncate. When using ‘x’ formatting, if an integer exceeds the set +length, it will push the number over. If the integer side of +auto-precision is not capped, the necessary length for an integer in the +associated by group will be as long as necessary. Decimals, on the other +hand, round to the specified length. These caps apply to the length +allotted for the “a” on either the integer or the decimal. So for +example, if the decimal length is capped at 2 and the selected precision +is “a+1”, then 3 decimal places will be allotted.

+

This was a basic situation, but if you’re paying close attention, you +may have some questions. What if you have more by variables, like by +visit AND test. Do we then calculate precision by visit and test? What +if collected precision is different per visit and we don’t want that? +What about multiple summary variables? How do we determine precision +then? We have modifier functions for this:

-tplyr_table(adlb, TRTA) %>% 
-  add_layer(
-    group_desc(vars(AVAL, CHG, BASE), by = PARAMCD) %>% 
-      set_format_strings(
-        'Mean (SD)' = f_str('a.a+1 (a.a+2)', mean, sd, empty="NA"),
-        cap = c(int=3, dec=2)
-      ) %>% 
-      set_precision_on(AVAL) %>% 
-      set_precision_by(PARAMCD)
-  ) %>%
-  build() %>% 
-  head() %>% 
-  kable()
- +tplyr_table(adlb, TRTA) %>% + add_layer( + group_desc(vars(AVAL, CHG, BASE), by = PARAMCD) %>% + set_format_strings( + 'Mean (SD)' = f_str('a.a+1 (a.a+2)', mean, sd, empty="NA"), + cap = c(int=3, dec=2) + ) %>% + set_precision_on(AVAL) %>% + set_precision_by(PARAMCD) + ) %>% + build() %>% + head() %>% + select(-starts_with("ord")) %>% + kable() +
------------+++++++++ @@ -925,9 +1056,6 @@

- - - @@ -942,9 +1070,6 @@

- - - @@ -958,9 +1083,6 @@

- - - @@ -974,9 +1096,6 @@

- - - @@ -990,9 +1109,6 @@

- - - @@ -1006,40 +1122,217 @@

- - -
row_label1var3_Placebo var3_Xanomeline High Dose var3_Xanomeline Low Doseord_layer_indexord_layer_1ord_layer_2
5.457 ( 1.5661) 4.712 ( 1.6382) 6.497 ( 2.7940)111
CA2.290 (0.0923) 2.289 (0.0828) 2.304 (0.0719)121
CK83.4 ( 38.13) 84.8 ( 64.27) 72.3 ( 35.71)131
GGT24.2 ( 19.36) 18.8 ( 25.84) 22.7 ( 11.05)141
URATE272.617 ( 65.7021) 310.486 ( 61.8285) 273.608 ( 86.9470)151
-

Three variables are being summarized here - AVAL, CHG, and BASE. So which should be used for precision? set_precision_on() allows you to specify this, where the precision_on() variable must be one of the variables within target_var. Similarly, set_precision_by() changes the by variables used to determine collected precision. If no precision_on() variable is specified, the first variable in target_var is used. If no precision_by() variables are specified, then the default by variables are used.

+

Three variables are being summarized here - AVAL, CHG, and BASE. So +which should be used for precision? set_precision_on() +allows you to specify this, where the precision_on() +variable must be one of the variables within target_var. +Similarly, set_precision_by() changes the by +variables used to determine collected precision. If no +precision_on() variable is specified, the first variable in +target_var is used. If no precision_by +variables are specified, then the default by variables are +used.

+
+

External Precision +

+

Lastly, while dynamic precision might be what you’re looking for, you +may not want precision driven by the data. Perhaps there’s a company +standard that dictates what decimal precision should be used for each +separate lab test. Maybe even deeper down to the lab test and category. +New in Tplyr 1.0.0 we’ve added the ability to take decimal precision +from an external source.

+

The principal of external precision is exactly the same as +auto-precision. The only difference is that you - the user - provide the +precision table that Tplyr was automatically calculating in the +background. This is done using the new function +set_precision_data(). In the output below, Notice how the +precision by PARAMCD varies depending on what was specified in the data +frame prec_data.

+
+prec_data <- tibble::tribble(
+  ~PARAMCD, ~max_int, ~max_dec,
+  "BUN",   1, 0,
+  "CA",    2, 4,
+  "CK",    3, 1,
+  "GGT",   3, 0,
+  "URATE", 3, 1,
+)
+  
+tplyr_table(adlb, TRTA) %>% 
+  add_layer(
+    group_desc(AVAL, by = PARAMCD) %>% 
+      set_format_strings(
+        'Mean (SD)' = f_str('a.a+1 (a.a+2)', mean, sd, empty="NA")
+      ) %>% 
+      set_precision_on(AVAL) %>% 
+      set_precision_by(PARAMCD) %>%
+      set_precision_data(prec_data)
+  ) %>%
+  build() %>% 
+  head() %>% 
+  select(-starts_with("ord")) %>%
+  kable()
+ +++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
row_label1row_label2var1_Placebovar1_Xanomeline High Dosevar1_Xanomeline Low Dose
BUNMean (SD)4.7 (2.05)4.6 (1.30)5.7 (2.94)
CAMean (SD)2.16566 ( 0.069249)2.18936 ( 0.137201)2.14570 ( 0.083087)
CKMean (SD)72.43 (288.405)64.25 ( 93.986)58.33 ( 77.915)
GGTMean (SD)17.8 ( 34.77)16.8 ( 48.69)21.3 ( 26.99)
URATEMean (SD)235.94 ( 83.697)271.23 ( 88.161)230.98 ( 87.006)
+

If one of your by variable groups are missing in the precision data, +Tplyr can default back to using auto-precision by using the option +default=auto.

+
+prec_data <- tibble::tribble(
+  ~PARAMCD, ~max_int, ~max_dec,
+  "BUN", 1, 0,
+  "CA",  2, 4,
+  "CK",  3, 1,
+  "GGT", 3, 0,
+)
+  
+tplyr_table(adlb, TRTA) %>% 
+  add_layer(
+    group_desc(AVAL, by = PARAMCD) %>% 
+      set_format_strings(
+        'Mean (SD)' = f_str('a.a+1 (a.a+2)', mean, sd, empty="NA")
+      ) %>% 
+      set_precision_on(AVAL) %>% 
+      set_precision_by(PARAMCD) %>%
+      set_precision_data(prec_data, default="auto")
+  ) %>%
+  build() %>% 
+  head() %>% 
+  select(-starts_with("ord")) %>%
+  kable()
+#> Unhandled precision cases were found - calculating precision based on source data
+ +++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
row_label1row_label2var1_Placebovar1_Xanomeline High Dosevar1_Xanomeline Low Dose
BUNMean (SD)4.7 (2.05)4.6 (1.30)5.7 (2.94)
CAMean (SD)2.16566 ( 0.069249)2.18936 ( 0.137201)2.14570 ( 0.083087)
CKMean (SD)72.43 (288.405)64.25 ( 93.986)58.33 ( 77.915)
GGTMean (SD)17.8 ( 34.77)16.8 ( 48.69)21.3 ( 26.99)
URATEMean (SD)235.9373 ( 83.69662)271.2288 ( 88.16093)230.9807 ( 87.00646)
-
- - - +
-
+ diff --git a/docs/articles/index.html b/docs/articles/index.html index 57be2783..a8dc9015 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -1,246 +1,144 @@ - - - - - - - -Articles • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Articles • Tplyr - - - - - - - - - - -
-
-
+
+ + +
-
+
Risk Difference
+
+
Sorting a Tplyr Table
+
+
Tplyr Options
+
+
Layer Templates
+
+
Producing a Styled Table
+
+
Totals, Missings, and Denominators
+
+
Getting Started
+
+
+
+

Using Metadata

+

+ +
Tplyr Metadata
+
+
Creating Custom Tplyr Metadata
+
+
+ -
- +
+ - - - + diff --git a/docs/articles/metadata.html b/docs/articles/metadata.html new file mode 100644 index 00000000..a638fe4a --- /dev/null +++ b/docs/articles/metadata.html @@ -0,0 +1,702 @@ + + + + + + + + +Tplyr Metadata • Tplyr + + + + + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +

Tplyr has a bit of a unique design, which might feel a bit weird as +you get used to the package. The process flow of building a +tplyr_table() object first, and then using +build() to construct the data frame is different than +programming in the tidyverse, or creating a ggplot. Why create the +tplyr_table() object first? Why is the +tplyr_table() object different than the resulting data +frame?

+

The purpose of the tplyr_table() object is to let Tplyr +do more than just summarize data. As you build the table, all of the +metadata around the table being built is maintained - the target +variables being summarized, the grouped variables by row and column, the +filter conditions necessary applied to the table and each layer. As a +user, you provide this information to create the summary. But what about +after the results are produced? Summarizing data inevitably leads to new +questions. Within clinical summaries, you may want to know which +subjects experienced an adverse event, or why the lab summaries of a +particular visit’s descriptive statistics are abnormal. Normally, you’d +write a query to recreate the data that lead to that particular summary. +Tplyr now allows you to immediately extract the input data or metadata +that created an output result, thus providing traceability from the +result back to the source.

+
+

Generating the Metadata +

+

Consider the following example:

+
+t <- tplyr_table(adsl, TRT01P, where = SAFFL == "Y") %>% 
+  add_layer(
+    group_count(RACE)
+  ) %>% 
+  add_layer(
+    group_desc(AGE, where = EFFFL == "Y")
+  )
+
+dat <- t %>% build(metadata=TRUE)
+
+kable(dat)
+ +++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
row_idrow_label1var1_Placebovar1_Xanomeline High Dosevar1_Xanomeline Low Doseord_layer_indexord_layer_1
c1_1AMERICAN INDIAN OR ALASKA NATIVE0 ( 0.0%)1 ( 1.2%)0 ( 0.0%)11
c2_1BLACK OR AFRICAN AMERICAN8 ( 9.3%)9 ( 10.7%)6 ( 7.1%)12
c3_1WHITE78 ( 90.7%)74 ( 88.1%)78 ( 92.9%)13
d1_2n79748121
d2_2Mean (SD)75.0 ( 8.43)73.9 ( 7.87)76.1 ( 8.02)22
d3_2Median76.075.578.023
d4_2Q1, Q369.5, 81.070.2, 79.071.0, 82.024
d5_2Min, Max52, 8856, 8851, 8825
d6_2Missing00026
+

To trigger the creation of metadata, the build() +function has a new argument metadata. By specifying +TRUE, the underlying metadata within Tplyr are prepared in +an extractable format. This is the only action a user needs to specify +for this action to take place.

+

When the metadata argument is used, a new column will be +produced in the output dataframe called row_id. The +row_id variable provides a persistent reference to a row of +interest, even if the output dataframe is sorted. If you review +vignette("styled-table"), note that we expect a certain +amount of post processing and styling of the built data frame from +Tplyr, to let you use whatever other packages you prefer. As such, this +reference ID is necessary.

+
+
+

Extracting The Input Source +

+

So, let’s cut to the chase. The most likely way you would use this +metadata is to pull out the source data that created a cell. For this, +we’ve provided the function get_meta_subset(). The only +information that you need is the row_id and column name of +the result cell of interest. For example, looking at the result above, +what if we want to know who the 8 subjects in the Placebo group who +where Black or African American:

+
+get_meta_subset(t, 'c2_1', 'var1_Placebo') %>% 
+  kable()
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
USUBJIDTRT01PSAFFLRACE
01-701-1203PlaceboYBLACK OR AFRICAN AMERICAN
01-701-1363PlaceboYBLACK OR AFRICAN AMERICAN
01-705-1282PlaceboYBLACK OR AFRICAN AMERICAN
01-706-1041PlaceboYBLACK OR AFRICAN AMERICAN
01-708-1286PlaceboYBLACK OR AFRICAN AMERICAN
01-708-1296PlaceboYBLACK OR AFRICAN AMERICAN
01-708-1378PlaceboYBLACK OR AFRICAN AMERICAN
01-711-1036PlaceboYBLACK OR AFRICAN AMERICAN
+

By using the row_id and column, the dataframe is pulled +right out for us. Notice that USUBJID was included by +default, even though Tplyr there’s no reference anywhere in the +tplyr_table() to the variable USUBJID. This is +because get_meta_subset() has an additional argument +add_cols that allows you to specify additional columns you +want included in the resulting dataframe, and has a default of USUBJID. +So let’s say we want additionally include the variable +SEX.

+
+get_meta_subset(t, 'c2_1', 'var1_Placebo', add_cols = vars(USUBJID, SEX)) %>% 
+  kable()
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
USUBJIDSEXTRT01PSAFFLRACE
01-701-1203FPlaceboYBLACK OR AFRICAN AMERICAN
01-701-1363FPlaceboYBLACK OR AFRICAN AMERICAN
01-705-1282FPlaceboYBLACK OR AFRICAN AMERICAN
01-706-1041FPlaceboYBLACK OR AFRICAN AMERICAN
01-708-1286FPlaceboYBLACK OR AFRICAN AMERICAN
01-708-1296MPlaceboYBLACK OR AFRICAN AMERICAN
01-708-1378MPlaceboYBLACK OR AFRICAN AMERICAN
01-711-1036MPlaceboYBLACK OR AFRICAN AMERICAN
+

Variables should be provided using dplyr::vars(), just +like the cols argument on tplyr_table() and +the by arguments in each layer type.

+

As mentioned, the input source data can be extracted for any result +cell created by Tplyr. So let’s say we want to know the subjects +relevant for the descriptive statistics around age in the Xanomeline +High Dose group:

+
+get_meta_subset(t, 'd1_2', 'var1_Xanomeline High Dose') %>% 
+  head(10) %>% 
+  kable()
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
USUBJIDTRT01PEFFFLSAFFLAGE
01-701-1028Xanomeline High DoseYY71
01-701-1034Xanomeline High DoseYY77
01-701-1133Xanomeline High DoseYY81
01-701-1146Xanomeline High DoseYY75
01-701-1148Xanomeline High DoseYY57
01-701-1180Xanomeline High DoseYY56
01-701-1181Xanomeline High DoseYY79
01-701-1239Xanomeline High DoseYY56
01-701-1275Xanomeline High DoseYY61
01-701-1287Xanomeline High DoseYY56
+

Note: Trimmed for space

+

Notice how the columns returned are different. First off, within the +summary above, we pulled results from the descriptive statistics layer. +The target variable for this layer was AGE, and as such +AGE is returned in the resulting output. Additionally, a +layer level where argument was used to subset to +EFFFL == "Y", which leads to EFFFL being +included in the output as well.

+
+
+

Extracting a Result Cell’s Metadata +

+

To extract the dataframe in get_meta_subset(), the +metadata of the result cell needs to first be extracted. This metadata +can be directly accessed using the function +get_meta_result(). Using the last example of +get_meta_subset() above:

+
+get_meta_result(t, 'd1_2', 'var1_Xanomeline High Dose')
+#> tplyr_meta: 4 names, 3 filters
+#> Names:
+#>     TRT01P, EFFFL, SAFFL, AGE 
+#> Filters:
+#>     TRT01P == c("Xanomeline High Dose"), EFFFL == "Y", SAFFL == "Y"
+

The resulting output is a new object Tplyr called +tplyr_meta(). This is a container of a relevent metadata +for a specific result. The object itself is a list with two elements: +names and filters.

+

The names element contains quosures for each variable +relevant to a specific result. This will include the target variable, +the by variables used on the layer, the cols +variables used on the table, and all variables included in any filter +condition relevant to create the result.

+

The filters element contains each filter condition +(provided as calls) necessary to create a particular cell. This will +include the table level where argument, the layer level +where argument, the filter condition for the specific value +of any by variable or cols variable necessary +to create the cell, and similarly the filter for the treatment group of +interest.

+

The results are provided this was so that they can be unpacked +directly into dplyr syntax when necessary, which is exactly +what happens in get_meta_subset(). For example:

+
+m <- get_meta_result(t, 'd1_2', 'var1_Xanomeline High Dose')
+
+adsl %>% 
+  filter(!!!m$filters) %>% 
+  select(!!!m$names) %>% 
+  head(10) %>% 
+  kable()
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TRT01PEFFFLSAFFLAGE
Xanomeline High DoseYY71
Xanomeline High DoseYY77
Xanomeline High DoseYY81
Xanomeline High DoseYY75
Xanomeline High DoseYY57
Xanomeline High DoseYY56
Xanomeline High DoseYY79
Xanomeline High DoseYY56
Xanomeline High DoseYY61
Xanomeline High DoseYY56
+

Note: Trimmed for space

+

But - who says you can’t let your imagination run wild?

+
+cat(c("adsl %>%\n",
+  "   filter(\n      ",
+  paste(purrr::map_chr(m$filters, ~ rlang::as_label(.)), collpase=",\n      "),
+  ") %>%\n",
+  paste("   select(", paste(purrr::map_chr(m$names, rlang::as_label), collapse=", "), ")", sep="")
+))
+
adsl %>%
+    filter(
+       TRT01P == c("Xanomeline High Dose") ,
+       EFFFL == "Y" ,
+       SAFFL == "Y" ,
+       ) %>%
+    select(TRT01P, EFFFL, SAFFL, AGE)
+
+
+

So, What Does This Get Me? +

+

So we get get metadata around a result cell, and we can get the exact +results from a result cell. You just need a row ID and a column name. +But - what does that get you? You can query your tables - and that’s +great. But how do you use that.

+

The idea behind this is really to support Shiny. Consider this minimal +application. Click any of the result cells within the table and see what +happens.

+ +

Source code available here

+

That’s what this is all about. The persistent row_id and +column selection enables you to use something like Shiny to +automatically query a cell based on its position in a table. Using click +events and a package like reactable, you can pick up +the row and column selected and pass that information into +get_meta_result(). Once you get the resulting data frame, +it’s up to you what you do with it, and you have the world of Shiny at +the tip of your fingers.

+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/options.html b/docs/articles/options.html index ed326c67..201aa363 100644 --- a/docs/articles/options.html +++ b/docs/articles/options.html @@ -4,7 +4,8 @@ - + + Tplyr Options • Tplyr @@ -12,14 +13,13 @@ - - - + + + - - + - + @@ -35,115 +35,96 @@ gtag('config', 'UA-165685385-2'); - -
-
-
- -
-

-Scientific Notation

-

By default, R will switch to scientific notation for a number less than .001. This is controlled by the scipen option. The default value of scipen is 0. If you’d like to increase the decimal places required for scientific notation to be triggered, increase the value of scipen. The value of scipen is the number of orders of ten smaller (i.e. decimal places preceded by 0’s) required to switch to scientific notation. Decreasing the value of scipen will cause R to switch to scientific location for larger numbers.

+
+

Scientific Notation +

+

By default, R will switch to scientific notation for a number less +than .001. This is controlled by the scipen option. The +default value of scipen is 0. If you’d like to increase the +decimal places required for scientific notation to be triggered, +increase the value of scipen. The value of +scipen is the number of orders of ten smaller (i.e. decimal +places preceded by 0’s) required to switch to scientific notation. +Decreasing the value of scipen will cause R to switch to +scientific location for larger numbers.

This is easier to understand with an example.

-options(scipen = 0) # This is the default
-.0001
-#> [1] 1e-04
-
-options(scipen = 1) # Require 5 decimal places instead
-
-.0001
-#> [1] 0.0001
-.00001
-#> [1] 1e-05
-
-options(scipen = -1) # Only require 3 decimal places
-.001
-#> [1] 1e-03
-

In ‘Tplyr’, we have the option tplyr.scipen. This is the scipen setting that will be used only while the ‘Tplyr’ table is being built. This allows you to use a different scipen setting within ‘Tplyr’ than your R session. The default value we use in ‘Tplyr’ is 9999, which is intended to totally prevent numbers from switching to scientific notation. We want this to be a conscious decision that you make in order to prevent any unexpected outputs.

+options(scipen = 0) # This is the default +.0001 +#> [1] 1e-04 + +options(scipen = 1) # Require 5 decimal places instead + +.0001 +#> [1] 0.0001 +.00001 +#> [1] 1e-05 + +options(scipen = -1) # Only require 3 decimal places +.001 +#> [1] 1e-03
+

In ‘Tplyr’, we have the option tplyr.scipen. This is the +scipen setting that will be used only while the +‘Tplyr’ table is being built. This allows you to use a different +scipen setting within ‘Tplyr’ than your R session. The +default value we use in ‘Tplyr’ is 9999, which is intended to totally +prevent numbers from switching to scientific notation. We want this to +be a conscious decision that you make in order to prevent any unexpected +outputs.

-options(tplyr.scipen = -3)
-t <- tplyr_table(adae, TRTA) %>% 
-  add_layer(
-    group_count(AEDECOD) %>% 
-      add_risk_diff(c('Xanomeline Low Dose', 'Placebo'))
-  )
-
-suppressWarnings(build(t)) %>% # Chi-squared warnings occur with small samples
-  head() %>% 
-  kable()
+options(tplyr.scipen = -3) +t <- tplyr_table(adae, TRTA) %>% + add_layer( + group_count(AEDECOD) %>% + add_risk_diff(c('Xanomeline Low Dose', 'Placebo')) + ) + +suppressWarnings(build(t)) %>% # Chi-squared warnings occur with small samples + head() %>% + kable()
@@ -680,24 +756,35 @@

-

Note that the risk-difference variables above have mostly shifted to scientific notation. This is because the limit has been shifted to .1 within the ‘Tplyr’ build.

+

Note that the risk-difference variables above have mostly shifted to +scientific notation. This is because the limit has been shifted to .1 +within the ‘Tplyr’ build.

-
-

-Quantile Algorithms

-

There are many algorithms available to compute quantile R has 9, controlled by the type parameter in quantile(). The descriptive statistics offer built-in summaries for Q1, Q3, and Interquartile range, all three of which use the quantile() in the underlying implementation. Given that we offer this default, we felt it was important to offer you the flexibility to change the algorithm. You can do this with tplyr.quantile_type.

+
+

Quantile Algorithms +

+

There are many algorithms available to compute quantile R has 9, +controlled by the type parameter in +quantile(). The descriptive statistics offer built-in +summaries for Q1, Q3, and Interquartile range, all three of which use +the quantile() in the underlying implementation. Given that +we offer this default, we felt it was important to offer you the +flexibility to change the algorithm. You can do this with +tplyr.quantile_type.

The default we chose to use is the R default of Type 7:

\[ -m = 1-p. p[k] = (k - 1) / (n - 1). \textrm{In this case, } p[k] = mode[F(x[k])]. \textrm{This is used by S.} -\] The example below demonstrates using the default quantile algorithm in R

+m = 1-p. p[k] = (k - 1) / (n - 1). \textrm{In this case, } p[k] = +mode[F(x[k])]. \textrm{This is used by S.} +\] The example below demonstrates using the default quantile +algorithm in R

-tplyr_table(adsl, TRT01P) %>% 
-  add_layer(
-    group_desc(CUMDOSE) %>% 
-      set_format_strings("Q1, Q3" = f_str('xxxxx, xxxxx', q1, q3))
-  ) %>% 
-  build() %>%
-  kable()
+tplyr_table(adsl, TRT01P) %>% + add_layer( + group_desc(CUMDOSE) %>% + set_format_strings("Q1, Q3" = f_str('xxxxx, xxxxx', q1, q3)) + ) %>% + build() %>% + kable()
@@ -724,22 +811,30 @@

1
-

Within the clinical world, you may wish to match the way that SAS calculates quantiles. To match SAS’s definition, use Type 3:

+

Within the clinical world, you may wish to match the way that SAS +calculates quantiles. To match SAS’s definition, use Type 3:

\[ -\textrm{Nearest even order statistic. γ = 0 if g = 0 and j is even, and 1 otherwise.} +\textrm{Nearest even order statistic. γ = 0 if g = 0 and j is even, and +1 otherwise.} \]

-options(tplyr.quantile_type = 3)
-
-tplyr_table(adsl, TRT01P) %>% 
-  add_layer(
-    group_desc(CUMDOSE) %>% 
-      set_format_strings("Q1, Q3" = f_str('xxxxx, xxxxx', q1, q3))
-  ) %>% 
-  build() %>% 
-    select(-starts_with("ord")) %>% 
-  kable()
+options(tplyr.quantile_type = 3) + +tplyr_table(adsl, TRT01P) %>% + add_layer( + group_desc(CUMDOSE) %>% + set_format_strings("Q1, Q3" = f_str('xxxxx, xxxxx', q1, q3)) + ) %>% + build() %>% + select(-starts_with("ord")) %>% + kable()
++++++ @@ -754,68 +849,68 @@

row_label1 var1_Placebo
-
-

-IBM Rounding

-

In certain cases users may want to match tables produced by other languages that IBM rounding. Tplyr offers the option ‘tplyr.IBMRounding’ to change the default rounding behavior of Tplyr tables. Review var1_4 in the tables below.

+
+

IBM Rounding +

+

In certain cases users may want to match tables produced by other +languages that IBM rounding. Tplyr offers the option ‘tplyr.IBMRounding’ +to change the default rounding behavior of Tplyr tables. Review var1_4 +in the tables below.

Using the default R behavior

-tplyr_table(mtcars, gear) %>%
-  add_layer(
-    group_desc(qsec) %>%
-      set_format_strings(mean = f_str("xx.xx", mean))
-  ) %>%
-  build()
-#> # A tibble: 1 × 6
-#>   row_label1 var1_3 var1_4 var1_5 ord_layer_index ord_layer_1
-#>   <chr>      <chr>  <chr>  <chr>            <int>       <int>
-#> 1 mean       17.69  18.96  15.64                1           1
+tplyr_table(mtcars, gear) %>% + add_layer( + group_desc(qsec) %>% + set_format_strings(mean = f_str("xx.xx", mean)) + ) %>% + build() +#> # A tibble: 1 × 6 +#> row_label1 var1_3 var1_4 var1_5 ord_layer_index ord_layer_1 +#> <chr> <chr> <chr> <chr> <int> <int> +#> 1 mean 17.69 18.96 15.64 1 1

Using IBM rounding

-withr::with_options(
-  list(tplyr.IBMRounding = TRUE),
-  {
-    tplyr_table(mtcars, gear) %>%
-      add_layer(
-        group_desc(qsec) %>%
-          set_format_strings(mean = f_str("xx.xx", mean))
-      ) %>%
-      build()
-  }
-)
-#> Warning: You have enabled IBM Rounding. This is an experimental feature.
-#> *  If you have feedback please get in touch with the maintainers!
-#> This warning is displayed once every 8 hours.
-#> # A tibble: 1 × 6
-#>   row_label1 var1_3 var1_4 var1_5 ord_layer_index ord_layer_1
-#>   <chr>      <chr>  <chr>  <chr>            <int>       <int>
-#> 1 mean       17.69  18.97  15.64                1           1
-
-
- - - + -
+ diff --git a/docs/articles/readme.html b/docs/articles/readme.html index a3ebabf2..7e419963 100644 --- a/docs/articles/readme.html +++ b/docs/articles/readme.html @@ -206,7 +206,7 @@

) %>% build() #> # A tibble: 9 × 8 -#> row_label1 row_label2 var1_Placebo `var1_Xanomelin… `var1_Xanomelin… +#> row_label1 row_label2 var1_Placebo `var1_Xanomeli…` `var1_Xanomeli…` #> <chr> <chr> <chr> <chr> <chr> #> 1 Age (years) n " 86" " 84" " 84" #> 2 Age (years) Mean (SD) "75.2 ( 8.5… "74.4 ( 7.89)" "75.7 ( 8.29)" diff --git a/docs/articles/riskdiff.html b/docs/articles/riskdiff.html index b5457adc..10628579 100644 --- a/docs/articles/riskdiff.html +++ b/docs/articles/riskdiff.html @@ -4,7 +4,8 @@ - + + Risk Difference • Tplyr @@ -12,14 +13,13 @@ - - - + + + - - + - + @@ -35,135 +35,123 @@ gtag('config', 'UA-165685385-2'); - -
-
-
- -
+ diff --git a/docs/articles/shift.html b/docs/articles/shift.html index a5b1acfa..91b86faa 100644 --- a/docs/articles/shift.html +++ b/docs/articles/shift.html @@ -4,7 +4,8 @@ - + + Shift Layers • Tplyr @@ -12,14 +13,13 @@ - - - + + + - - + - + @@ -35,138 +35,140 @@ gtag('config', 'UA-165685385-2'); - -
-
-
- + diff --git a/docs/articles/sort.html b/docs/articles/sort.html index 527f700a..56bdc085 100644 --- a/docs/articles/sort.html +++ b/docs/articles/sort.html @@ -4,7 +4,8 @@ - + + Sorting a Tplyr Table • Tplyr @@ -12,14 +13,13 @@ - - - + + + - - + - + @@ -35,128 +35,108 @@ gtag('config', 'UA-165685385-2'); - -
-
-
- -
-

-Reordering and Dropping Columns

-

Column selection from data frames is something that is already very well done in R. The functions dplyr::select(), magrittr::extract(), and [ can all be used to reorder and drop column cleanly and concisely based on a user’s preference.

-

To drop the ordering helpers, you can easily subtract them with ‘dplyr’ and ‘tidyselect’.

+
+

Reordering and Dropping Columns +

+

Column selection from data frames is something that is already very +well done in R. The functions dplyr::select(), +magrittr::extract(), and [ can all be used to +reorder and drop column cleanly and concisely based on a user’s +preference.

+

To drop the ordering helpers, you can easily subtract them with +‘dplyr’ and ‘tidyselect’.

-t %>% 
-  select(-starts_with("ord_")) %>% 
-  kable()
- +t %>% + select(-starts_with("ord_")) %>% + kable() +
@@ -675,12 +668,13 @@

-

Or you can reorder columns. In this example the “Total” result column is moved to the front of the results.

+

Or you can reorder columns. In this example the “Total” result column +is moved to the front of the results.

-t %>%
-  select( starts_with("row"), var1_Total, starts_with("var1")) %>% 
-  kable()
- +t %>% + select( starts_with("row"), var1_Total, starts_with("var1")) %>% + kable() +
@@ -846,18 +840,26 @@

-

For more information, it’s well worth your time to familiarize yourself with the select helpers that work with ‘dplyr’.

+

For more information, it’s well worth your time to familiarize +yourself with the select +helpers that work with ‘dplyr’.

-
-

-Sorting the Layers

-

Layers are one of the fundamental building blocks of ‘Tplyr’. Each layer executes independently, and at the end of a build they’re bound together. The ord_layer_index variable allows you differentiate and sort layers after the table is built. Layers are indexed in the order in which they were added to the table using add_layer() or add_layers(). For example, let’s say you wanted to reverse the order of the layers.

+
+

Sorting the Layers +

+

Layers are one of the fundamental building blocks of ‘Tplyr’. Each +layer executes independently, and at the end of a build they’re bound +together. The ord_layer_index variable allows you +differentiate and sort layers after the table is built. Layers are +indexed in the order in which they were added to the table using +add_layer() or add_layers(). For example, +let’s say you wanted to reverse the order of the layers.

-t %>%
-  select(starts_with("row"), starts_with("ord")) %>%
-  arrange(desc(ord_layer_index)) %>% 
-  kable()
+t %>% + select(starts_with("row"), starts_with("ord")) %>% + arrange(desc(ord_layer_index)) %>% + kable()
@@ -982,29 +984,45 @@

row_label1
-
-

-Sorting the by Variables

-

Each by variable gets its own order column as well. These will be named ord_layer_<n> where <n> typically relates back to the row_label variable (this isn’t necessarily the case when count layers are nested - see vignette("count")).

-

These order variables will calculate based on the first applicable method below.

+
+

Sorting the by Variables +

+

Each by variable gets its own order column as well. +These will be named ord_layer_<n> where +<n> typically relates back to the +row_label variable (this isn’t necessarily the case when +count layers are nested - see vignette("count")).

+

These order variables will calculate based on the first applicable +method below.

    -
  1. If the by variable is a factor, the values of the ordering column will be associated with the factor levels.
  2. -
  3. If the variable has a VARN variable in the target dataset, (i.e. AVISIT has AVISITN, or PARAM has PARAMN), that variable will be extracted and used as the ordering variable associated with that row label.
  4. -
  5. If neither 1 or 2 are true, the values in the ordering column will be based on an alphabetical sorting. The resulting column will be numeric.
  6. +
  7. If the by variable is a factor, the values of the +ordering column will be associated with the factor levels.
  8. +
  9. If the variable has a VARN variable in the +target dataset, (i.e. AVISIT has +AVISITN, or PARAM has PARAMN), +that variable will be extracted and used as the ordering variable +associated with that row label.
  10. +
  11. If neither 1 or 2 are true, the values in the ordering column will +be based on an alphabetical sorting. The resulting column will be +numeric.
-
-

-Factor

-

If there’s no VARN variable in the target dataset, ‘Tplyr’ will then check if the variable you provided is a factor. If you’re new to R, spending some time trying to understand factor variables is quite worthwhile. Let’s look at example using the variable ETHNIC and see some of the advantages in practice.

+
+

Factor +

+

If there’s no VARN variable in the target dataset, +‘Tplyr’ will then check if the variable you provided is a factor. If +you’re new to R, spending some time trying to understand factor +variables is quite worthwhile. Let’s look at example using the variable +ETHNIC and see some of the advantages in practice.

-adsl$ETHNIC <- factor(adsl$ETHNIC, levels=c("HISPANIC OR LATINO", "NOT HISPANIC OR LATINO", "DUMMMY"))
-tplyr_table(adsl, TRT01A) %>%
-  add_layer(
-    group_count(EOSSTT, by = ETHNIC)
-  ) %>%
-  build() %>%
-  select(row_label1, row_label2, ord_layer_1) %>%
-  kable()
+adsl$ETHNIC <- factor(adsl$ETHNIC, levels=c("HISPANIC OR LATINO", "NOT HISPANIC OR LATINO", "DUMMMY")) +tplyr_table(adsl, TRT01A) %>% + add_layer( + group_count(EOSSTT, by = ETHNIC) + ) %>% + build() %>% + select(row_label1, row_label2, ord_layer_1) %>% + kable()
@@ -1044,19 +1062,36 @@

row_label1
-

Factor variables have ‘levels’. These levels are essentially what the VARN variables are trying to achieve - they specify the order of the different values within the associated variable. The variable we set above specifies that “HISPANIC OR LATINO” should sort first, then “NOT HISPANIC OR LATINO”, and finally “DUMMY”. Notice how they’re not alphabetical?

-

A highly advantageous aspect of using factor variables in ‘Tplyr’ is that factor variables can be used to insert dummy values into your table. Consider this line of code from above:

-
adsl$ETHNIC <- factor(adsl$ETHNIC, levels=c("HISPANIC OR LATINO", "NOT HISPANIC OR LATINO", "DUMMMY"))
-

This is converting the variable ETHNIC to a factor, then setting the factor levels. But it doesn’t change any of the values in the dataset - there are no values of “dummy” within ETHNIC in ADSL. Yet in the output built above, you see rows for “DUMMY”. By using factors, you can insert rows into your ‘Tplyr’ table that don’t exist in the data. This is particularly helpful if you’re working with data early on in a study, where certain values are expected, yet do not currently exist in the data. This will help you prepare tables that are complete even when your data are not.

+

Factor variables have ‘levels’. These levels are essentially what the +VARN variables are trying to achieve - they specify the +order of the different values within the associated variable. The +variable we set above specifies that “HISPANIC OR LATINO” should sort +first, then “NOT HISPANIC OR LATINO”, and finally “DUMMY”. Notice how +they’re not alphabetical?

+

A highly advantageous aspect of using factor variables in ‘Tplyr’ is +that factor variables can be used to insert dummy values into your +table. Consider this line of code from above:

+
adsl$ETHNIC <- factor(adsl$ETHNIC, levels=c("HISPANIC OR LATINO", "NOT HISPANIC OR LATINO", "DUMMMY"))
+

This is converting the variable ETHNIC to a factor, then +setting the factor levels. But it doesn’t change any of the +values in the dataset - there are no values of “dummy” within +ETHNIC in ADSL. Yet in the output built above, you see rows +for “DUMMY”. By using factors, you can insert rows into your ‘Tplyr’ +table that don’t exist in the data. This is particularly helpful if +you’re working with data early on in a study, where certain values are +expected, yet do not currently exist in the data. This will help you +prepare tables that are complete even when your data are not.

-
-

-VARN

-

To demonstrate the use of VARN sorting, consider the variable RACE. In ADSL, RACE also has RACEN:

+
+

VARN +

+

To demonstrate the use of VARN sorting, consider the +variable RACE. In ADSL, RACE also +has RACEN:

-adsl %>% 
-  distinct(RACEN, RACE) %>% 
-  kable()
+adsl %>% + distinct(RACEN, RACE) %>% + kable()
@@ -1077,16 +1112,18 @@

RACE
-

‘Tplyr’ will automatically figure this out for you, and pull the RACEN values into the variable ord_layer_1.

+

‘Tplyr’ will automatically figure this out for you, and pull the +RACEN values into the variable +ord_layer_1.

-tplyr_table(adsl, TRT01A) %>%
-  add_layer(
-    group_count(EOSSTT, by = RACE)
-  ) %>%
-  build() %>%
-  select(row_label1, row_label2, ord_layer_1) %>%
-  arrange(ord_layer_1) %>% 
-  kable()
+tplyr_table(adsl, TRT01A) %>% + add_layer( + group_count(EOSSTT, by = RACE) + ) %>% + build() %>% + select(row_label1, row_label2, ord_layer_1) %>% + arrange(ord_layer_1) %>% + kable()
@@ -1127,29 +1164,38 @@

row_label1
-
-

-Alphabetical

-

Lastly, If the target doesn’t have a VARN variable in the target dataset and isn’t a factor, ‘Tplyr’ will sort the variable alphabetically. The resulting order variable will be numeric, simply numbering each of the variable values alphabetically. Nothing fancy to it!

+
+

Alphabetical +

+

Lastly, If the target doesn’t have a VARN variable in +the target dataset and isn’t a factor, ‘Tplyr’ will sort the variable +alphabetically. The resulting order variable will be numeric, simply +numbering each of the variable values alphabetically. Nothing fancy to +it!

-
-

-Sorting Descriptive Statistic Summaries

-

After the by variables, each layer will sort results slightly differently. We’ll start with the most simple case - descriptive statistic layers. As the user, you have full control over the order in which results present using set_format_strings(). Results will be ordered based on the order in which you create your f_str() objects.

+
+

Sorting Descriptive Statistic Summaries +

+

After the by variables, each layer will sort results +slightly differently. We’ll start with the most simple case - +descriptive statistic layers. As the user, you have full control over +the order in which results present using +set_format_strings(). Results will be ordered based on the +order in which you create your f_str() objects.

-tplyr_table(adsl, TRT01A) %>%
-  add_layer(
-    group_desc(HEIGHTBL) %>% 
-      set_format_strings(
-        'Group 1' = f_str('xx.x', mean),
-        'Group 2' = f_str('xx.x', median),
-        'Group 3' = f_str('xx.x', sd)
-      )
-  ) %>% 
-  build() %>% 
-  select(starts_with("row"), starts_with("ord")) %>% 
-  kable()
+tplyr_table(adsl, TRT01A) %>% + add_layer( + group_desc(HEIGHTBL) %>% + set_format_strings( + 'Group 1' = f_str('xx.x', mean), + 'Group 2' = f_str('xx.x', median), + 'Group 3' = f_str('xx.x', sd) + ) + ) %>% + build() %>% + select(starts_with("row"), starts_with("ord")) %>% + kable()
@@ -1174,38 +1220,59 @@

row_label1
-

Each of the separate “Groups” added above were indexed based on their position in set_format_strings(). If you’d like to change the order, all you need to do is update your set_format_strings() call.

+

Each of the separate “Groups” added above were indexed based on their +position in set_format_strings(). If you’d like to change +the order, all you need to do is update your +set_format_strings() call.

-
-

-Sorting Count Layers

-

The order in which results appear on a frequency table can be deceptively complex and depends on the situation at hand. With this in mind, ‘Tplyr’ has 3 different methods of ordering the results of a count layer using the function set_order_count_method():

+
+

Sorting Count Layers +

+

The order in which results appear on a frequency table can be +deceptively complex and depends on the situation at hand. With this in +mind, ‘Tplyr’ has 3 different methods of ordering the results of a count +layer using the function set_order_count_method():

  1. -“byfactor” - The default method is to sort by a factor. If the input variable is not a factor, alphabetical sorting will be used.
  2. +“byfactor” - The default method is to sort by a +factor. If the input variable is not a factor, alphabetical sorting will +be used.
  3. -“byvarn” - Similar to a ‘by’ variable, a count target can be sorted with a VARN variable existing in the target dataset.
  4. +“byvarn” - Similar to a ‘by’ variable, a count +target can be sorted with a VARN variable existing in the target +dataset.
  5. -“bycount” - This is the most complex method. Many tables require counts to be sorted based on the counts within a particular group, like a treatment variable. ‘Tplyr’ can populate the ordering column based on numeric values within any results column. This requires some more granular control, for which we’ve created the functions set_ordering_cols() and set_result_order_var() to specify the column and numeric value on which the ordering column should be based.
  6. +“bycount” - This is the most complex method. Many +tables require counts to be sorted based on the counts within a +particular group, like a treatment variable. ‘Tplyr’ can populate the +ordering column based on numeric values within any results column. This +requires some more granular control, for which we’ve created the +functions set_ordering_cols() and +set_result_order_var() to specify the column and numeric +value on which the ordering column should be based.
-
-

-“byfactor” and “byvarn”

-

“byfactor” is the default ordering method of results for count layers. Both “byfactor” and “byvarn” behave exactly like the order variables associated with by variables in a ‘Tplyr’ table. For “byvarn”, you must set the sort method using set_order_count_method().

+
+

“byfactor” and “byvarn” +

+

“byfactor” is the default ordering method of results for count +layers. Both “byfactor” and “byvarn” behave exactly like the order +variables associated with by variables in a ‘Tplyr’ table. +For “byvarn”, you must set the sort method using +set_order_count_method().

-adsl$AGEGR1 <- factor(adsl$AGEGR1, c("<65", "65-80", ">80"))
-# Warnings suppressed to remove 'forcats' implicit NA warning
-suppressWarnings({
-  tplyr_table(adsl, TRT01A) %>%
-    add_layer(
-      group_count(AGEGR1) %>%
-        # This is the default and not needed
-        set_order_count_method("byfactor")
-    ) %>% 
-    build() %>%
-    select(row_label1, ord_layer_1) %>%
-    kable()
-})
+adsl$AGEGR1 <- factor(adsl$AGEGR1, c("<65", "65-80", ">80")) +# Warnings suppressed to remove 'forcats' implicit NA warning +suppressWarnings({ + tplyr_table(adsl, TRT01A) %>% + add_layer( + group_count(AGEGR1) %>% + # This is the default and not needed + set_order_count_method("byfactor") + ) %>% + build() %>% + select(row_label1, ord_layer_1) %>% + kable() +})
@@ -1227,14 +1294,14 @@

row_label1
-tplyr_table(adsl, TRT01A) %>%
-  add_layer(
-    group_count(RACE) %>%
-      set_order_count_method("byvarn")
-  ) %>%
-  build() %>%
-  select(row_label1, ord_layer_1) %>%
-  kable()
+tplyr_table(adsl, TRT01A) %>% + add_layer( + group_count(RACE) %>% + set_order_count_method("byvarn") + ) %>% + build() %>% + select(row_label1, ord_layer_1) %>% + kable()
@@ -1256,35 +1323,42 @@

row_label1
-
-

-“bycount”

-

Using count-based sorting is where things get more complicated. There are multiple items to consider:

+
+

“bycount” +

+

Using count-based sorting is where things get more complicated. There +are multiple items to consider:

  • What column do you want to sort by?
  • -
  • If there are multiple numbers in the column, like “n (%) [event]” type tables, which number should be used to create the sort variable?
  • +
  • If there are multiple numbers in the column, like “n (%) [event]” +type tables, which number should be used to create the sort +variable?
-

We’ve created helper functions to aid in making this step more intuitive from a user perspective, and to maintain the flexibility that you need. The two functions that you need here are set_ordering_cols() and set_result_order_var().

+

We’ve created helper functions to aid in making this step more +intuitive from a user perspective, and to maintain the flexibility that +you need. The two functions that you need here are +set_ordering_cols() and +set_result_order_var().

-tplyr_table(adae, TRTA) %>%
-  add_layer(
-    group_count(AEDECOD) %>% 
-      # This will present 3 numbers in a cell
-      set_format_strings(f_str("xx (xx.x%) [x]", distinct_n, distinct_pct, n)) %>% 
-      # This makes the distinct numbers available
-      set_distinct_by(USUBJID) %>%
-      # Choosing "bycount" ordering for the result variable
-      set_order_count_method("bycount") %>%
-      # This will target the results column for Xanomeline High Dose, or `var1_Xanomeline High Dose`
-      set_ordering_cols("Xanomeline High Dose") %>% 
-      # The number we want to pull out is the distinct N counts
-      set_result_order_var(distinct_n)
-  ) %>% 
-  build() %>% 
-  arrange(desc(ord_layer_1)) %>% 
-  select(row_label1, `var1_Xanomeline High Dose`, ord_layer_1) %>% 
-  head() %>% 
-  kable()
+tplyr_table(adae, TRTA) %>% + add_layer( + group_count(AEDECOD) %>% + # This will present 3 numbers in a cell + set_format_strings(f_str("xx (xx.x%) [x]", distinct_n, distinct_pct, n)) %>% + # This makes the distinct numbers available + set_distinct_by(USUBJID) %>% + # Choosing "bycount" ordering for the result variable + set_order_count_method("bycount") %>% + # This will target the results column for Xanomeline High Dose, or `var1_Xanomeline High Dose` + set_ordering_cols("Xanomeline High Dose") %>% + # The number we want to pull out is the distinct N counts + set_result_order_var(distinct_n) + ) %>% + build() %>% + arrange(desc(ord_layer_1)) %>% + select(row_label1, `var1_Xanomeline High Dose`, ord_layer_1) %>% + head() %>% + kable()
@@ -1294,59 +1368,67 @@

- + - + - + - + - + - +
row_label1
PRURITUS26 (61.9%) [38]26 ( 4.8%) [38] 26
ERYTHEMA14 (33.3%) [22]14 ( 2.6%) [22] 14
RASH11 (26.2%) [18]11 ( 2.0%) [18] 11
HYPERHIDROSIS8 (19.0%) [10]8 ( 1.5%) [10] 8
SKIN IRRITATION5 (11.9%) [8]5 ( 0.9%) [8] 5
RASH PRURITIC2 ( 4.8%) [3]2 ( 0.4%) [3] 2
-

In the above example, the results columns of the output table actually contain three different numbers: the distinct counts, the distinct percentage, and the non-distinct counts. We want to use distinct counts, so we choose distinct_n.

-

The next question that we need to answer when sorting by counts is which result column to take counts out of. Here, we have three results columns - one for each treatment group in the dataset. We want to use the results for the treatment group “Xanomeline High Dose”, so we provide the name of the treatment group.

-

But what if you have an additional column variable on top of the treatment groups?

+

In the above example, the results columns of the output table +actually contain three different numbers: the distinct counts, the +distinct percentage, and the non-distinct counts. We want to use +distinct counts, so we choose distinct_n.

+

The next question that we need to answer when sorting by counts is +which result column to take counts out of. Here, we have three results +columns - one for each treatment group in the dataset. We want to use +the results for the treatment group “Xanomeline High Dose”, so we +provide the name of the treatment group.

+

But what if you have an additional column variable on top of the +treatment groups?

-tplyr_table(adae, TRTA, cols=SEX) %>%
-  add_layer(
-    group_count(AEDECOD) %>% 
-      # This will present 3 numbers in a cell
-      set_format_strings(f_str("xx (xx.x%) [x]", distinct_n, distinct_pct, n)) %>% 
-      # This makes the distinct numbers available
-      set_distinct_by(USUBJID) %>%
-      # Choosing "bycount" ordering for the result variable
-      set_order_count_method("bycount") %>%
-      # This will target the results column for Xanomeline High Dose, or `var1_Xanomeline High Dose`
-      set_ordering_cols("Xanomeline High Dose", "F") %>% 
-      # The number we want to pull out is the distinct N counts
-      set_result_order_var(distinct_n)
-  ) %>% 
-  build() %>% 
-  arrange(desc(ord_layer_1)) %>% 
-  select(row_label1, `var1_Xanomeline High Dose_F`, ord_layer_1) %>% 
-  head() %>% 
-  kable()
+tplyr_table(adae, TRTA, cols=SEX) %>% + add_layer( + group_count(AEDECOD) %>% + # This will present 3 numbers in a cell + set_format_strings(f_str("xx (xx.x%) [x]", distinct_n, distinct_pct, n)) %>% + # This makes the distinct numbers available + set_distinct_by(USUBJID) %>% + # Choosing "bycount" ordering for the result variable + set_order_count_method("bycount") %>% + # This will target the results column for Xanomeline High Dose, or `var1_Xanomeline High Dose` + set_ordering_cols("Xanomeline High Dose", "F") %>% + # The number we want to pull out is the distinct N counts + set_result_order_var(distinct_n) + ) %>% + build() %>% + arrange(desc(ord_layer_1)) %>% + select(row_label1, `var1_Xanomeline High Dose_F`, ord_layer_1) %>% + head() %>% + kable()
@@ -1356,52 +1438,59 @@

- + - + - + - + - + - +
row_label1
PRURITUS11 (78.6%) [14]11 ( 3.7%) [14] 11
ERYTHEMA7 (50.0%) [8]7 ( 2.4%) [8] 7
RASH3 (21.4%) [5]3 ( 1.0%) [5] 3
HYPERHIDROSIS2 (14.3%) [2]2 ( 0.7%) [2] 2
RASH PRURITIC1 ( 7.1%) [1]1 ( 0.3%) [1] 1
SKIN IRRITATION1 ( 7.1%) [2]1 ( 0.3%) [2] 1
-

Here we’re ordering on the female subjects in the “Xanomeline High Dose” cohort. In set_result_order_var(), you need to enter the values from each variable between treat_var and any variable entered in cols that you’d like to extract.

+

Here we’re ordering on the female subjects in the “Xanomeline High +Dose” cohort. In set_result_order_var(), you need to enter +the values from each variable between treat_var and any +variable entered in cols that you’d like to extract.

-
-

-Nested Sorting

-

Nested count layers add one more piece to the puzzle. As a reminder, nested count layers are count summaries that are summarizing both a grouping variable, and a variable that’s being grouped. The best example is probably Adverse Event tables, where we want to see adverse events that occurred within different body systems.

+
+

Nested Sorting +

+

Nested count layers add one more piece to the puzzle. As a reminder, +nested count layers are count summaries that are summarizing both a +grouping variable, and a variable that’s being grouped. The best example +is probably Adverse Event tables, where we want to see adverse events +that occurred within different body systems.

-tplyr_table(adae, TRTA) %>% 
-  add_layer(
-    group_count(vars(AEBODSYS, AEDECOD))
-  ) %>% 
-  build() %>% 
-  head() %>% 
-  kable()
- +tplyr_table(adae, TRTA) %>% + add_layer( + group_count(vars(AEBODSYS, AEDECOD)) + ) %>% + build() %>% + head() %>% + kable() +
@@ -1485,19 +1574,36 @@

-

In a layer that uses nesting, we need one more order variable - as we’re now concerned with the sorting of both the outside and inside variable. Counts are being summarized for both - so we need to know how both should be sorted. Additionally, we need to make sure that, in this case, the adverse events within a body system stay within the rows of that body system.

-

These result variables will always be the last two order variables output by ‘Tplyr’. In the above example, ord_layer_1 is for AEBODSYS and ord_layer_2 is for AEDECOD. Note that ord_layer_2 has Inf where row_label1 and row_label2 are both equal. This is the row that summarizes the AEBODSYS counts. By default, ‘Tplyr’ is set to assume that you will use descending sort on the order variable associated with the inside count variable (i.e. AEDECOD). This is because in nested count layer you will often want to sort by descending occurrence of the inside target variable. If you’d like to use ascending sorting instead, we offer the function set_outer_sort_position().

+

In a layer that uses nesting, we need one more order variable - as +we’re now concerned with the sorting of both the outside and inside +variable. Counts are being summarized for both - so we need to know how +both should be sorted. Additionally, we need to make sure that, in this +case, the adverse events within a body system stay within the rows of +that body system.

+

These result variables will always be the last two order variables +output by ‘Tplyr’. In the above example, ord_layer_1 is for +AEBODSYS and ord_layer_2 is for +AEDECOD. Note that ord_layer_2 has +Inf where row_label1 and +row_label2 are both equal. This is the row that summarizes +the AEBODSYS counts. By default, ‘Tplyr’ is set to assume +that you will use descending sort on the order variable +associated with the inside count variable (i.e. AEDECOD). +This is because in nested count layer you will often want to sort by +descending occurrence of the inside target variable. If you’d like to +use ascending sorting instead, we offer the function +set_outer_sort_position().

-tplyr_table(adae, TRTA) %>% 
-  add_layer(
-    group_count(vars(AEBODSYS, AEDECOD)) %>% 
-      set_outer_sort_position("asc")
-  ) %>% 
-  build() %>% 
-  arrange(ord_layer_1, ord_layer_2) %>% 
-  select(starts_with("row"), starts_with("ord_layer")) %>% 
-  head() %>% 
-  kable()
+tplyr_table(adae, TRTA) %>% + add_layer( + group_count(vars(AEBODSYS, AEDECOD)) %>% + set_outer_sort_position("asc") + ) %>% + build() %>% + arrange(ord_layer_1, ord_layer_2) %>% + select(starts_with("row"), starts_with("ord_layer")) %>% + head() %>% + kable()
@@ -1558,18 +1664,33 @@

-

Notice that the Inf has now switched to -Inf to ensure that the AEBODSYS row stays at the top of the group.

-

Another consideration of nested sorting is whether or not you want to sort both result variables the same way. Do you want to sort both by counts? Or do you want to sort one alphabetically and the other by count? Or maybe one has a VARN variable associated with it? For this reason, set_order_count_method() can take in a 2-element character vector, where the first element specifies the outside variable and the second the inside variable.

+

Notice that the Inf has now switched to +-Inf to ensure that the AEBODSYS row stays at +the top of the group.

+

Another consideration of nested sorting is whether or not you want to +sort both result variables the same way. Do you want to sort both by +counts? Or do you want to sort one alphabetically and the other by +count? Or maybe one has a VARN variable associated with it? +For this reason, set_order_count_method() can take in a +2-element character vector, where the first element specifies the +outside variable and the second the inside variable.

-tplyr_table(adsl, TRT01A) %>%
-  add_layer(
-    group_count(vars(EOSSTT, DCDECOD)) %>%
-      set_order_count_method(c("byfactor", "bycount"))
-  ) %>%
-  build() %>%
-  select(starts_with("row"), starts_with("ord")) %>%
-  kable()
+tplyr_table(adsl, TRT01A) %>% + add_layer( + group_count(vars(EOSSTT, DCDECOD)) %>% + set_order_count_method(c("byfactor", "bycount")) + ) %>% + build() %>% + select(starts_with("row"), starts_with("ord")) %>% + kable()
+++++++ @@ -1657,21 +1778,33 @@

row_label1 row_label2
-

In the example above, EOSTT is ordered alphabetically (recall that using “byfactor” when the variable is not a factor will do alphabetical sorting), and DSDECOD is ordered by count.

-

If only one method is provided, that method will automatically be applied to both variables. So in the example below, “bycount” is applied to both EOSTT and DSDECOD.

+

In the example above, EOSTT is ordered alphabetically +(recall that using “byfactor” when the variable is not a factor will do +alphabetical sorting), and DSDECOD is ordered by count.

+

If only one method is provided, that method will automatically be +applied to both variables. So in the example below, “bycount” is applied +to both EOSTT and DSDECOD.

-tplyr_table(adsl, TRT01A) %>%
-  add_total_group() %>%
-  add_layer(
-    group_count(vars(EOSSTT, DCDECOD)) %>%
-      set_order_count_method("bycount") %>%
-      #set_order_count_method("bycount", "bycount") %>% This is functionally the same.
-      set_ordering_cols(Total)
-  ) %>%
-  build() %>%
-  select(starts_with("row"),  var1_Total, starts_with("ord")) %>%
-  kable()
+tplyr_table(adsl, TRT01A) %>% + add_total_group() %>% + add_layer( + group_count(vars(EOSSTT, DCDECOD)) %>% + set_order_count_method("bycount") %>% + #set_order_count_method("bycount", "bycount") %>% This is functionally the same. + set_ordering_cols(Total) + ) %>% + build() %>% + select(starts_with("row"), var1_Total, starts_with("ord")) %>% + kable()
++++++++ @@ -1772,19 +1905,22 @@

row_label1 row_label2
-
-

-Sorting Shift Tables

-

Shift tables keep things relatively simple when it comes to sorting and use the “byfactor” method seen above. We encourage this primarily because you likely want the benefits of factor variables on a shift layer. For example, consider this table:

+
+

Sorting Shift Tables +

+

Shift tables keep things relatively simple when it comes to sorting +and use the “byfactor” method seen above. We encourage this primarily +because you likely want the benefits of factor variables on a shift +layer. For example, consider this table:

-tplyr_table(adlb, TRTA, where=PARAMCD == "CK") %>%
-  add_layer(
-    group_shift(vars(row = BNRIND, column = ANRIND), by = vars(PARAM, AVISIT))
-  ) %>%
-  build() %>%
-  select(-starts_with('var1')) %>% 
-  head(20) %>% 
-  kable()
+tplyr_table(adlb, TRTA, where=PARAMCD == "CK") %>% + add_layer( + group_shift(vars(row = BNRIND, column = ANRIND), by = vars(PARAM, AVISIT)) + ) %>% + build() %>% + select(-starts_with('var1')) %>% + head(20) %>% + kable()
@@ -1864,21 +2000,22 @@

There are a few problems here:

  • “H” sorts before “N” alphabetically
  • -
  • We’re missing the rows for “L” on most visits, even though “L” in in the data for BNRIND.
  • +
  • We’re missing the rows for “L” on most visits, even though “L” in in +the data for BNRIND.

Using factor variables cleans this right up for us:

-adlb$BNRIND <- factor(adlb$BNRIND, levels=c("L", "N", "H"))
-adlb$ANRIND <- factor(adlb$ANRIND, levels=c("L", "N", "H"))
-
-tplyr_table(adlb, TRTA, where=PARAMCD == "CK") %>%
-  add_layer(
-    group_shift(vars(row = BNRIND, column = ANRIND), by = vars(PARAM, AVISIT))
-  ) %>%
-  build() %>%
-  select(-starts_with('var1')) %>% 
-  head(20) %>% 
-  kable()
+adlb$BNRIND <- factor(adlb$BNRIND, levels=c("L", "N", "H")) +adlb$ANRIND <- factor(adlb$ANRIND, levels=c("L", "N", "H")) + +tplyr_table(adlb, TRTA, where=PARAMCD == "CK") %>% + add_layer( + group_shift(vars(row = BNRIND, column = ANRIND), by = vars(PARAM, AVISIT)) + ) %>% + build() %>% + select(-starts_with('var1')) %>% + head(20) %>% + kable()

@@ -1982,34 +2119,34 @@

-

Now we have the nice “L”, “N”, “H” order that we’d like to see. Other sort methods on a shift table are fairly unlikely, as the matrix structure of the counts displayed by shift tables is relevant to the presentation and interpreting results.

+

Now we have the nice “L”, “N”, “H” order that we’d like to see. Other +sort methods on a shift table are fairly unlikely, as the matrix +structure of the counts displayed by shift tables is relevant to the +presentation and interpreting results.

Happy sorting!

-
- - - +
-
+ diff --git a/docs/articles/styled-table.html b/docs/articles/styled-table.html index a0e7cfcb..6b00b331 100644 --- a/docs/articles/styled-table.html +++ b/docs/articles/styled-table.html @@ -4,7 +4,8 @@ - + + Producing a Styled Table • Tplyr @@ -12,14 +13,13 @@ - - - + + + - - + - + @@ -35,118 +35,102 @@ gtag('config', 'UA-165685385-2'); - -
-
-
- -
+ diff --git a/docs/articles/table.html b/docs/articles/table.html index daf76836..2ba1a54e 100644 --- a/docs/articles/table.html +++ b/docs/articles/table.html @@ -4,7 +4,8 @@ - + + Tplyr Table Properties • Tplyr @@ -12,14 +13,13 @@ - - - + + + - - + - + @@ -35,139 +35,129 @@ gtag('config', 'UA-165685385-2'); - -
-
-
- -
+ diff --git a/docs/articles/tplyr.html b/docs/articles/tplyr.html index d813c74d..5da5060a 100644 --- a/docs/articles/tplyr.html +++ b/docs/articles/tplyr.html @@ -5,18 +5,35 @@ -tplyr • Tplyr +Getting Started • Tplyr + + + + + + - + + + + + + +
@@ -31,7 +48,7 @@ Tplyr - 0.1.0 + 0.4.4
@@ -39,10 +56,13 @@

+

When you look at a summary table within a clinical report, you can often break it down into some basic pieces. Consider this output.

+

+

Different variables are being summarized in chunks of the table, which we refer to as “layers”. Additionally, this table really only contains a few different types of summaries, which makes many of the calculations rather redundant. This drives the motivation behind ‘Tplyr’. The containing table is encapsulated within the tplyr_table() object, and each section, or “layer”, within the summary table can be broken down into a tplyr_layer() object.

+

-Installation

-

tplyr isn’t on CRAN yet. You can download it from GitHub for now.

-
devtools::install_github("atorus-research/tplyr")
+The tplyr_table() Object +

The tplyr_table() object is the conceptual “table” that contains all of the logic necessary to construct and display the data. ‘Tplyr’ tables are made up of one or more layers. Each layer contains an instruction for a summary to be performed. The tplyr_table() object contains those layers, and the general data, metadata, and logic necessary to prepare the data before any layers are constructed.

+

When a tplyr_table() is created, it will contain the following bindings:

+
    +
  • +target - The dataset upon which summaries will be performed
  • +
  • +count_layer_formats - Default formats to be used on count layers in the table
  • +
  • +shift_layer_formats - Default formats to be used on shift layers in the table
  • +
  • +desc_layer_formats - Default formats to be used on descriptive statistics layers in the table
  • +
  • +pop_data - The dataset containing population information. This defaults to the target dataset
  • +
  • +cols - A categorical variable in the target dataset to present summaries grouped by column (in addition to the treat_var variable)
  • +
  • +table_where - The where clause provided, used to subset the target dataset
  • +
  • +treat_var - Variable used to distinguish treatment groups in the target dataset.
  • +
  • +header_n - Default header N values based on treat_var and any cols variables
  • +
  • +pop_treat_var - Variable used to distinguish treatment groups in pop_data dataset (if different than the treat_var variable in the target dataset)
  • +
  • +layers - The container for individual layers of a tplyr_table() +
  • +
  • +treat_grps - Additional treatment groups to be added to the summary (i.e. Total)
  • +
+

The function tplyr_table() allows you a basic interface to instantiate the object. Modifier functions are available to change individual parameters catered to your analysis.

+
+t <- tplyr_table(adsl, TRT01P, where = SAFFL == "Y")
+t
+#> *** tplyr_table ***
+#> Target (data.frame):
+#>  Name:  adsl
+#>  Rows:  254
+#>  Columns:  49 
+#> treat_var variable (quosure)
+#>  TRT01P
+#> header_n:  header groups
+#> treat_grps groupings (list)
+#> Table Columns (cols):
+#> where: == SAFFL Y
+#> Number of layer(s): 0
+#> layer_output: 0
-
+

-Tables

-

The tplyr_table is the object that contains the data, layer envrionments, and other properties shared by the layer environments.

-
-

-Table Properties

+The tplyr_layer Object +

Users of ‘Tplyr’ interface with tplyr_layer() objects using the group_<type> family of functions. This family specifies the type of summary that is to be performed within a layer. count layers are used to create summary counts of some discrete variable. shift layers summarize the counts for different changes in states. Lastly, desc layers create descriptive statistics.

    -
  • target
  • -
  • treat_var
  • -
  • pop_data
  • -
  • pop_treat_var
  • -
  • header
  • -
  • header_n
  • -
  • layers
  • +
  • +Count Layers +
      +
    • Count layers allow you to easily create summaries based on counting distinct or non-distinct occurrences of values within a variable. Additionally, this layer allows you to create n (%) summaries where you’re also summarizing the proportion of instances a value occurs compared to some denominator. Count layers are also capable of producing counts of nested relationships. For example, if you want to produce counts of an overall outside group, and then the subgroup counts within that group, you can simply specify the target variable as vars(OutsideVariable, InsideVariable). This allows you to do tables like Adverse Events where you want to see the Preferred Terms within Body Systems, all in one layer. Count layers can also distinguish between distinct and non-distinct counts. Using some specified by variable, you can count the unique occurrences of some variable within the specified by grouping, including the target. This allows you to do a summary like unique subjects and their proportion experiencing some adverse event, and the number of total occurrences of that adverse event.
    • +
    +
  • +
  • +Descriptive Statistics Layers +
      +
    • Descriptive statistics layers perform summaries on continuous variables. There are a number of summaries built into ‘Tplyr’ already that you can perform, including n, mean, median, standard deviation, variance, min, max, interquartile range, Q1, Q3, and missing value counts. From these available summaries, the default presentation of a descriptive statistics layer will output ‘n’, ‘Mean (SD)’, ‘Median’, ‘Q1, Q3’, ‘Min, Max’, and ‘Missing’. You can change these summaries using set_format_strings(), and you can also add your own summaries using set_custom_summaries(). This allows you to easily implement any additional summary statistics you want presented.
    +
  • +
  • +Shift Layers +
      +
    • Shift layers are largely an abstraction of the count layer - and in fact, we re-use a lot of the same code to process these layers. In many shift tables, the “from” state is presented as rows in the table, and the “to” state is presented as columns. This clearly lays out how many subjects changed state between a baseline and some point in time. Shift layers give you an intuitive API to break these out, using a very similar interface as the other layers. There are also a number of modifier functions available to control nuanced aspects, such as how denominators should be applied.
    • +
    +
  • +
+
+cnt <- group_count(t, AGEGR1)
+cnt
+#> *** count_layer ***
+#> Self:  count_layer < 0x55d916df4ea0 >
+#> Parent:  tplyr_table < 0x55d911aa5ee0 >
+#> target_var: 
+#>  AGEGR1
+#> by: 
+#> where: TRUE
+#> Layer(s): 0
+
+dsc <- group_desc(t, AGE)
+dsc
+#> *** desc_layer ***
+#> Self:  desc_layer < 0x55d916f44218 >
+#> Parent:  tplyr_table < 0x55d911aa5ee0 >
+#> target_var: 
+#>  AGE
+#> by: 
+#> where: TRUE
+#> Layer(s): 0
+
+shf <- group_shift(t, vars(row=COMP8FL, column=COMP24FL))
+shf
+#> *** shift_layer ***
+#> Self:  shift_layer < 0x55d917062f50 >
+#> Parent:  tplyr_table < 0x55d911aa5ee0 >
+#> target_var: 
+#>  COMP8FL
+#>  COMP24FL
+#> by: 
+#> where: TRUE
+#> Layer(s): 0
+
+
+

+Adding Layers to a Table

+

Everyone has their own style of coding - so we’ve tried to be flexible to an extent. Overall, ‘Tplyr’ is built around tidy syntax, so all of our object construction supports piping with magrittr (i.e. %>%).

+

There are two ways to add layers to a tplyr_table(): add_layer() and add_layers(). The difference is that add_layer() allows you to construct the layer within the call to add_layer(), whereas with add_layers() you can attach multiple layers that have already been constructed upfront:

+
+t <- tplyr_table(adsl, TRT01P) %>% 
+  add_layer(
+    group_count(AGEGR1, by = "Age categories n (%)")
+  )
+

Within add_layer(), the syntax to constructing the count layer for Age Categories was written on the fly. add_layer() is special in that it also allows you to use piping to use modifier functions on the layer being constructed

+
+t <- tplyr_table(adsl, TRT01P) %>% 
+  add_layer(
+    group_count(AGEGR1, by = "Age categories n (%)") %>% 
+      set_format_strings(f_str("xx (xx.x%)", n, pct)) %>% 
+      add_total_row()
+  )
+

add_layers(), on the other hand, lets you isolate the code to construct a particular layer if you wanted to separate things out more. Some might find this cleaner to work with if you have a large number of layers being constructed.

+
+t <- tplyr_table(adsl, TRT01P) 
+
+l1 <- group_count(t, AGEGR1, by = "Age categories n (%)")
+l2 <- group_desc(t, AGE, by = "Age (years)")
+
+t <- add_layers(t, l1, l2)
+

Notice that when you construct the layers separately, you need to specify the table to which they belong. add_layer() does this automatically. tplyr_table() and tplyr_layer() objects are built on environments, and the parent/child relationships are very important. This is why, even though the layer knows who its table parent is, the layers still need to be attached to the table (as the table doesn’t know who its children are). Advanced R does a very good job at explaining what environments in R are, their benefits, and how to use them.

+
+

+A Note Before We Go Deeper

+

Notice that when you construct a tplyr_table() or a tplyr_layer() that what displays is a summary of information about the table or layer? That’s because when you create these objects - it constructs the metadata, but does not process the actual data. This allows you to construct and make sure the pieces of your table fit together before you do the data processing - and it gives you a container to hold all of this metadata, and use it later if necessary.

+

To generate the data from a tplyr_table() object, you use the function build():

+
+t <- tplyr_table(adsl, TRT01P) %>% 
+  add_layer(
+    group_count(AGEGR1, by = "Age categories n (%)")
+  )
+
+t %>% 
+  build() %>% 
+  kable()
+ ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
row_label1row_label2var1_Placebovar1_Xanomeline High Dosevar1_Xanomeline Low Doseord_layer_indexord_layer_1ord_layer_2
Age categories n (%)<6514 ( 16.3%)11 ( 13.1%)8 ( 9.5%)111
Age categories n (%)>8030 ( 34.9%)18 ( 21.4%)29 ( 34.5%)112
Age categories n (%)65-8042 ( 48.8%)55 ( 65.5%)47 ( 56.0%)113
+

But there’s more you can get from ‘Tplyr’. It’s great to have the formatted numbers, but what about the numeric data behind the scenes? Maybe a number looks suspicious and you need to investigate how you got that number. What if you want to calculate your own statistics based off of the counts? You can get that information as well using get_numeric_data(). This returns the numeric data from each layer as a list of data frames:

+
+get_numeric_data(t) %>% 
+  head() %>% 
+  kable()
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TRT01P“Age categories n (%)”summary_varndistinct_ndistinct_totaltotal
PlaceboAge categories n (%)<65141386
PlaceboAge categories n (%)>80301386
PlaceboAge categories n (%)65-80421386
Xanomeline High DoseAge categories n (%)<65111384
Xanomeline High DoseAge categories n (%)>80181384
Xanomeline High DoseAge categories n (%)65-80551384
Xanomeline Low DoseAge categories n (%)<6581384
Xanomeline Low DoseAge categories n (%)>80291384
Xanomeline Low DoseAge categories n (%)65-80471384
+
+

By storing pertinent information, you can get more out of a ‘Tplyr’ object than processed data for display. And by specifying when you want to get data out of ‘Tplyr’, we can save you from repeatedly processing data while your constructing your outputs - which is particularly useful when that computation starts taking time.

-
+

-Layers

-

The tplyr_layers are the workhorse objects of the package and contain the logic and bindings that are performed on the target data. When the table is rendered layers are executed and combined to create the table.

-
+Constructing Layers +

The bulk of ‘Tplyr’ coding comes from constructing your layers and specifying the work you want to be done. Before we get into this, it’s important to discuss how ‘Tplyr’ handles string formatting.

+

-Layer Properties

+String Formatting in ‘Tplyr’ +

String formatting in ‘Tplyr’ is controlled by an object called an f_str(), which is also the name of function you use to create these formats. To set these format strings into a tplyr_layer(), you use the function set_format_strings(), and this usage varies slightly between layer types (which is covered in other vignettes).

+

So - why is this object necessary. Consider this example:

+
+
+t <- tplyr_table(adsl, TRT01P) %>% 
+  add_layer(
+    group_desc(AGE, by = "Age (years)") %>% 
+      set_format_strings(
+        'n' = f_str('xx', n),
+        'Mean (SD)' = f_str('xx.xx (xx.xxx)', mean, sd)
+      )
+  )
+
+t %>% 
+  build() %>% 
+  kable()
+ ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
row_label1row_label2var1_Placebovar1_Xanomeline High Dosevar1_Xanomeline Low Doseord_layer_indexord_layer_1ord_layer_2
Age (years)n868484111
Age (years)Mean (SD)75.21 ( 8.590)74.38 ( 7.886)75.67 ( 8.286)112
+

In a perfect world, the f_str() calls wouldn’t be necessary - but in reality they allow us to infer a great deal of information from very few user inputs. In the calls that you see above:

    -
  • parent
  • -
  • type
  • -
  • by
  • -
  • target_var
  • -
  • where
  • -
  • sort_vars
  • -
  • sort
  • +
  • The row labels in the row_label2 column are taken from the left side of each = in set_format_strings() +
  • +
  • The string formats, including integer length and decimal precision, and exact presentation formatting are taken from the strings within the first parameter of each f_str() call
  • +
  • The second and greater parameters within each f_str() call determine the descriptive statistic summaries that will be performed. This is connected to a number of default summaries available within ‘Tplyr’, but you can also create your own summaries (covered in other vignettes). The default summaries that are built in include: +
      +
    • +n = Number of observations
    • +
    • +mean = Mean
    • +
    • +sd = Standard Deviation
    • +
    • +var = Variance
    • +
    • +iqr = Inter Quartile Range
    • +
    • +q1 = 1st quartile
    • +
    • +q3 = 3rd quartile
    • +
    • +min = Minimum value
    • +
    • +max = Maximum value
    • +
    • +missing = Count of NA values
    • +
    +
  • +
  • When two summaries are placed on the same f_str() call, then those two summaries are formatted into the same string. This allows you to do a “Mean (SD)” type format where both numbers appear.
+

This simple user input controls a significant amount of work in the back end of the data processing, and the f_str() object allows that metadata to be collected.

+

f_str() objects are also used with count layers as well to control the data presentation. Instead of specifying the summaries performed, you use n, pct, distinct_n, and distinct_pct for your parameters and specify how you would like the values displayed. Using distinct_n and distinct_pct should be combined with specifying a distinct_by() variable using set_distinct_by().

+
+tplyr_table(adsl, TRT01P) %>% 
+  add_layer(
+    group_count(AGEGR1, by = "Age categories") %>% 
+      set_format_strings(f_str('xx (xx.x)',n,pct))
+  ) %>% 
+  build() %>% 
+  kable()
+ ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
row_label1row_label2var1_Placebovar1_Xanomeline High Dosevar1_Xanomeline Low Doseord_layer_indexord_layer_1ord_layer_2
Age categories<6514 (16.3)11 (13.1)8 ( 9.5)111
Age categories>8030 (34.9)18 (21.4)29 (34.5)112
Age categories65-8042 (48.8)55 (65.5)47 (56.0)113
+
+
+tplyr_table(adsl, TRT01P) %>% 
+  add_layer(
+    group_count(AGEGR1, by = "Age categories") %>% 
+      set_format_strings(f_str('xx',n))
+  ) %>% 
+  build() %>% 
+  kable()
+ ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
row_label1row_label2var1_Placebovar1_Xanomeline High Dosevar1_Xanomeline Low Doseord_layer_indexord_layer_1ord_layer_2
Age categories<6514118111
Age categories>80301829112
Age categories65-80425547113
+

Really - format strings allow you to present your data however you like.

+
+tplyr_table(adsl, TRT01P) %>% 
+  add_layer(
+    group_count(AGEGR1, by = "Age categories") %>% 
+      set_format_strings(f_str('xx (•◡•) xx.x%',n,pct))
+  ) %>% 
+  build() %>% 
+  kable()
+ ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
row_label1row_label2var1_Placebovar1_Xanomeline High Dosevar1_Xanomeline Low Doseord_layer_indexord_layer_1ord_layer_2
Age categories<6514 (•◡•) 16.3%11 (•◡•) 13.1%8 (•◡•) 9.5%111
Age categories>8030 (•◡•) 34.9%18 (•◡•) 21.4%29 (•◡•) 34.5%112
Age categories65-8042 (•◡•) 48.8%55 (•◡•) 65.5%47 (•◡•) 56.0%113
+

But should you? Probably not.

-
+

-What makes Tplyr different

-

There are several packages that also aim to make reporting for clinical reports easier. We found that none of these package offered the needed extensiblity or weren’t flexible enough to tackle the entire suite of tables intend to create.

-
+Layer Types +

-Capabilities

-

Tplyr allows a programmer to create new treatment groups from ones that already exist. This is useful when a table requires comparisons between individual groups and the population or between placebo groups and treated groups. Tplyr can create any table you would typically find in an ICH report in a regulatory submission. It is designed to close the gap between an analysis dataset and the creation of the tables.

-
    -
  • Counts for categorical data
  • -
  • Statistics for numeric data
  • -
  • Shifts from baseline
  • -
  • P-values and meta information
  • -
+Descriptive Statistic Layers +

As covered under string formatting, set_format_strings() controls a great deal of what happens within a descriptive statistics layer. Note that there are some built in defaults to what’s output:

+
+tplyr_table(adsl, TRT01P) %>% 
+  add_layer(
+    group_desc(AGE, by = "Age (years)")
+  ) %>% 
+  build() %>% 
+  kable()
+ ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
row_label1row_label2var1_Placebovar1_Xanomeline High Dosevar1_Xanomeline Low Doseord_layer_indexord_layer_1ord_layer_2
Age (years)n868484111
Age (years)Mean (SD)75.2 ( 8.59)74.4 ( 7.89)75.7 ( 8.29)112
Age (years)Median76.076.077.5113
Age (years)Q1, Q369.2, 81.870.8, 80.071.0, 82.0114
Age (years)Min, Max52, 8956, 8851, 88115
Age (years)Missing000116
+

To override these defaults, just specify the summaries that you want to be performed using set_format_strings() as described above. But what if ‘Tplyr’ doesn’t have a built in function to do the summary statistic that you want to see? Well - you can make your own! This is where set_custom_summaries() comes into play. Let’s say you want to derive a geometric mean.

+
+tplyr_table(adsl, TRT01P) %>%
+  add_layer(
+    group_desc(AGE, by = "Sepal Length") %>%
+      set_custom_summaries(
+        geometric_mean = exp(sum(log(.var[.var > 0]), na.rm=TRUE) / length(.var))
+      ) %>%
+      set_format_strings(
+        'Geometric Mean (SD)' = f_str('xx.xx (xx.xxx)', geometric_mean, sd)
+      )
+  ) %>% 
+  build() %>% 
+  kable()
+ ++++++++++ + + + + + + + + + + + + + + + + + + + + +
row_label1row_label2var1_Placebovar1_Xanomeline High Dosevar1_Xanomeline Low Doseord_layer_indexord_layer_1ord_layer_2
Sepal LengthGeometric Mean (SD)74.70 ( 8.590)73.94 ( 7.886)75.18 ( 8.286)111
+

In set_custom_summaries(), first you name the summary being performed. This is important - that name is what you use in the f_str() call to incorporate it into a format. Next, you program or call the function desired. What happens in the background is that this is used in a call to dplyr::summarize() - so use similar syntax. Use the variable name .var in your custom summary function. This is necessary because it allows a generic variable name to be used when multiple target variables are specified - and therefore the function can be applied to both target variables.

+

Sometimes there’s a need to present multiple variables summarized side by side. ‘Tplyr’ allows you to do this as well.

+
+tplyr_table(adsl, TRT01P) %>% 
+  add_layer(
+    group_desc(vars(AGE, AVGDD), by = "Age and Avg. Daily Dose")
+  ) %>% 
+  build() %>% 
+  kable()
+ +++++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
row_label1row_label2var1_Placebovar1_Xanomeline High Dosevar1_Xanomeline Low Dosevar2_Placebovar2_Xanomeline High Dosevar2_Xanomeline Low Doseord_layer_indexord_layer_1ord_layer_2
Age and Avg. Daily Dosen868484868484111
Age and Avg. Daily DoseMean (SD)75.2 ( 8.59)74.4 ( 7.89)75.7 ( 8.29)0.0 ( 0.00)71.6 ( 8.11)54.0 ( 0.00)112
Age and Avg. Daily DoseMedian76.076.077.50.075.154.0113
Age and Avg. Daily DoseQ1, Q369.2, 81.870.8, 80.071.0, 82.00.0, 0.070.2, 76.954.0, 54.0114
Age and Avg. Daily DoseMin, Max52, 8956, 8851, 880, 054, 7954, 54115
Age and Avg. Daily DoseMissing000000116
+

‘Tplyr’ summarizes both variables and merges them together. This makes creating tables where you need to compare BASE, AVAL, and CHG next to each other nice and simple. Note the use of dplyr::vars() - in any situation where you’d like to use multiple variable names in a parameter, use dplyr::vars() to specify the variables. You can use text strings in the calls to dplyr::vars() as well.

-
+

-Quosures and Environments

-

Tplyr is built off of environments in R to ensure data are scoped properly and not needlessly duplicated. This also allows for nesting of layers for more complicated displays. Reporting tables are abstracted as ‘layers’ and R code is executed within these layers independently.

-

Similar to dplyr, it is possible to pass functions and unquoted variable names which are evaluated when the table is rendered. This allows the tables to be programmed before the data is available. Moving the programming of the tables forward in the report process can cut down the time from collection -> data programming -> analysis programming -> table programming.

+Count Layers +

Count layers generally allow you to create “n” and “n (%)” count type summaries. There are a few extra features here as well. Let’s say that you want a total row within your counts. This can be done with add_total_row():

+
+tplyr_table(adsl, TRT01P) %>% 
+  add_layer(
+    group_count(AGEGR1, by = "Age categories") %>% 
+      add_total_row()
+  ) %>% 
+  build() %>% 
+  kable()
+ ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
row_label1row_label2var1_Placebovar1_Xanomeline High Dosevar1_Xanomeline Low Doseord_layer_indexord_layer_1ord_layer_2
Age categories<6514 ( 16.3%)11 ( 13.1%)8 ( 9.5%)111
Age categories>8030 ( 34.9%)18 ( 21.4%)29 ( 34.5%)112
Age categories65-8042 ( 48.8%)55 ( 65.5%)47 ( 56.0%)113
Age categoriesTotal86 (100.0%)84 (100.0%)84 (100.0%)114
+

Sometimes it’s also necessary to count summaries based on distinct values. ‘Tplyr’ allows you to do this as well with set_distinct_by():

+
+tplyr_table(adae, TRTA) %>% 
+  add_layer(
+    group_count('Subjects with at least one adverse event') %>% 
+      set_distinct_by(USUBJID) %>% 
+      set_format_strings(f_str('xx', n))
+  ) %>% 
+  build() %>% 
+  kable()
+ ++++++++ + + + + + + + + + + + + + + + + +
row_label1var1_Placebovar1_Xanomeline High Dosevar1_Xanomeline Low Doseord_layer_indexord_layer_1
Subjects with at least one adverse event471111181NA
+

There’s another trick going on here - to create a summary with row label text like you see above, text strings can be used as the target variables. Here, we use this in combination with set_distinct_by() to count distinct subjects.

+

Adverse event tables often call for counting AEs of something like a body system and counting actual events within that body system. ‘Tplyr’ has means of making this simple for the user as well.

+
+tplyr_table(adae, TRTA) %>% 
+  add_layer(
+    group_count(vars(AEBODSYS, AEDECOD))
+  ) %>% 
+  build() %>% 
+  head() %>% 
+  kable()
+ ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
row_label1row_label2var1_Placebovar1_Xanomeline High Dosevar1_Xanomeline Low Doseord_layer_indexord_layer_1ord_layer_2
SKIN AND SUBCUTANEOUS TISSUE DISORDERSSKIN AND SUBCUTANEOUS TISSUE DISORDERS47 (100.0%)111 (100.0%)118 (100.0%)11Inf
SKIN AND SUBCUTANEOUS TISSUE DISORDERSACTINIC KERATOSIS0 ( 0.0%)1 ( 0.9%)0 ( 0.0%)111
SKIN AND SUBCUTANEOUS TISSUE DISORDERSALOPECIA1 ( 2.1%)0 ( 0.0%)0 ( 0.0%)112
SKIN AND SUBCUTANEOUS TISSUE DISORDERSBLISTER0 ( 0.0%)2 ( 1.8%)8 ( 6.8%)113
SKIN AND SUBCUTANEOUS TISSUE DISORDERSCOLD SWEAT3 ( 6.4%)0 ( 0.0%)0 ( 0.0%)114
SKIN AND SUBCUTANEOUS TISSUE DISORDERSDERMATITIS ATOPIC1 ( 2.1%)0 ( 0.0%)0 ( 0.0%)115
+

Here we again use dplyr::vars() to specify multiple target variables. When used in a count layer, ‘Tplyr’ knows automatically that the first variable is a grouping variable for the second variable, and counts shall be produced for both then merged together.

-
+

-Audit, Logging, and Debugging

-

Tplyr has functionality to produce the logs you would expect when running a SAS file. This allows a programmer to walk through the execution and see how the functions are effecting the data. It also provides a level of tractability for auditors reviewing the outputs. These logging functions can be toggled on and off with the following options in R. When output is sunk and files are sourced with echo, a reliable audit trail can be persisted and attached to an analysis.

-
    -
  • <tplyr_debug>
  • -
  • <tidylog_switch>
  • -
  • <sink(file)>
  • -
-
options(tplyr_debug = TRUE)
-options(tidylog_switch = TRUE)
-sink("myoutput.txt")
+Shift Layers +

Lastly, let’s talk about shift layers. A common example of this would be looking at a subject’s lab levels at baseline versus some designated evaluation point. This would tell us, for example, how many subjects were high at baseline for a lab test vs. after an intervention has been introduced. The shift layer in ‘Tplyr’ is intended for creating shift tables that show these data as a matrix, where one state will be presented in rows and the other in columns. Let’s look at an example.

+
+# Tplyr can use factor orders to dummy values and order presentation
+adlb$ANRIND <- factor(adlb$ANRIND, c("L", "N", "H"))
+adlb$BNRIND <- factor(adlb$BNRIND, c("L", "N", "H"))
+
+tplyr_table(adlb, TRTA, where = PARAMCD == "CK") %>%
+  add_layer(
+    group_shift(vars(row=BNRIND, column=ANRIND), by=PARAM) %>% 
+      set_format_strings(f_str("xx (xxx%)", n, pct))
+  ) %>% 
+  build() %>% 
+  kable()
+ ++++++++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
row_label1row_label2var1_Placebo_Lvar1_Placebo_Nvar1_Placebo_Hvar1_Xanomeline High Dose_Lvar1_Xanomeline High Dose_Nvar1_Xanomeline High Dose_Hvar1_Xanomeline Low Dose_Lvar1_Xanomeline Low Dose_Nvar1_Xanomeline Low Dose_Hord_layer_indexord_layer_1ord_layer_2
Creatine Kinase (U/L)L0 ( 0%)0 ( 0%)0 ( 0%)0 ( 0%)0 ( 0%)0 ( 0%)0 ( 0%)0 ( 0%)0 ( 0%)1351
Creatine Kinase (U/L)N0 ( 0%)27 ( 87%)4 ( 13%)0 ( 0%)17 ( 85%)2 ( 10%)0 ( 0%)14 ( 93%)1 ( 7%)1352
Creatine Kinase (U/L)H0 ( 0%)0 ( 0%)0 ( 0%)0 ( 0%)0 ( 0%)1 ( 5%)0 ( 0%)0 ( 0%)0 ( 0%)1353
+

The underlying process of shift tables is the same as count layers - we’re counting the number of occurrences of something by a set of grouping variables. This differs in that ‘Tplyr’ uses the group_shift() API to use the same basic interface as other tables, but translate your target variables into the row variable and the column variable. Furthermore, there is some enhanced control over how denominators should behave that is necessary for a shift layer.

- +
+

+Where to go from here?

+

There’s quite a bit more to learn! And we’ve prepared a number of other vignettes to help you get what you need out of ‘Tplyr’.

+
+

+References

+

In building ‘Tplyr’, we needed some additional resources in addition to our personal experience to help guide design. PHUSE has done some great work to create guidance for standard outputs with collaboration between multiple pharmaceutical companies and the FDA. You can find some of the resource that we referenced below.

+

Analysis and Displays Associated with Adverse Events

+

Analyses and Displays Associated with Demographics, Disposition, and Medications

+

Analyses and Displays Associated with Measures of Central Tendency

@@ -242,7 +1372,7 @@

-

Site built with pkgdown 1.5.1.

+

Site built with pkgdown 1.6.1.

diff --git a/docs/authors.html b/docs/authors.html index c91c5470..d3f0b778 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -1,230 +1,139 @@ - - - - - - - -Authors • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Authors and Citation • Tplyr - - - - - - - - - - -
-
-
+
+ +
+
-
- - -
- +
+

Authors

+ +
  • +

    Eli Miller. Author. +

    +
  • +
  • +

    Mike Stackhouse. Author, maintainer. +

    +
  • +
  • +

    Ashley Tarasiewicz. Author. +

    +
  • +
  • +

    Nathan Kosiba. Author. +

    +
  • +
  • +

    Atorus Research LLC. Copyright holder. +

    +
  • +
+ +
+

Citation

+

Source: DESCRIPTION

+ +

Miller E, Stackhouse M, Tarasiewicz A, Kosiba N (2022). +Tplyr: A Traceability Focused Grammar of Clinical Data Summary. +R package version 1.0.0, https://github.com/atorus-research/Tplyr. +

+
@Manual{,
+  title = {Tplyr: A Traceability Focused Grammar of Clinical Data Summary},
+  author = {Eli Miller and Mike Stackhouse and Ashley Tarasiewicz and Nathan Kosiba},
+  year = {2022},
+  note = {R package version 1.0.0},
+  url = {https://github.com/atorus-research/Tplyr},
+}
+
+
-
- +
+ - - - + diff --git a/docs/index.html b/docs/index.html index 3729daf4..624d1527 100644 --- a/docs/index.html +++ b/docs/index.html @@ -4,22 +4,22 @@ - -A Grammar of Clinical Data Summary • Tplyr + + +A Traceability Focused Grammar of Clinical Data Summary • Tplyr - - - + + + - - - - + + + @@ -35,135 +35,108 @@ gtag('config', 'UA-165685385-2'); - -
-
-
- -
- -

Welcome to Tplyr! Tplyr is a grammar of data format and summary. It’s designed to simplify the creation of clinical safety summaries and help you focus on how you present your data rather than redundant summaries being performed.

-

As always, we welcome your feedback. If you spot a bug, would like to see a new feature, or if any documentation is unclear - submit an issue through GitHub right here.

+

Welcome to Tplyr! Tplyr is a traceability minded grammar of data format and summary. It’s designed to simplify the creation of common clinical summaries and help you focus on how you present your data rather than redundant summaries being performed. Furthermore, for every result Tplyr produces, it also produces the metadata necessary to give your traceability from source to summary.

+

As always, we welcome your feedback. If you spot a bug, would like to see a new feature, or if any documentation is unclear - submit an issue through GitHub right here.

Take a look at the cheatsheet!

-
-

-Installation

+
+

Installation +

You can Tplyr install with:

-# Install from CRAN:
-install.packages("Tplyr")
-
-# Or install the development version:
-devtools::install_github("https://github.com/atorus-research/Tplyr.git")
+# Install from CRAN: +install.packages("Tplyr") + +# Or install the development version: +devtools::install_github("https://github.com/atorus-research/Tplyr.git", ref="devel")
-
-

-What is Tplyr?

-

dplyr from tidyverse is a grammar of data manipulation. So what does that allow you to do? It gives you, as a data analyst, the capability to easily and intuitively approach the problem of manipulating your data into an analysis ready form. dplyr conceptually breaks things down into verbs that allow you to focus on what you want to do more than how you have to do it.

-

Tplyr is designed around a similar concept, but its focus is on building summary tables within the clinical world. In the pharmaceutical industry, a great deal of the data presented in the outputs we create are very similar. For the most part, most of these tables can be broken down into a few categories:

+
+

What is Tplyr? +

+

dplyr from tidyverse is a grammar of data manipulation. So what does that allow you to do? It gives you, as a data analyst, the capability to easily and intuitively approach the problem of manipulating your data into an analysis ready form. dplyr conceptually breaks things down into verbs that allow you to focus on what you want to do more than how you have to do it.

+

Tplyr is designed around a similar concept, but its focus is on building summary tables common within the clinical world. In the pharmaceutical industry, a great deal of the data presented in the outputs we create are very similar. For the most part, most of these tables can be broken down into a few categories:

  • Counting for event based variables or categories
  • Shifting, which is just counting a change in state with a ‘from’ and a ‘to’
  • Generating descriptive statistics around some continuous variable.
-

For many of the tables that go into a clinical submission, at least when considering safety outputs, the tables are made up of a combination of these approaches. Consider a demographics table - and let’s use an example from the PHUSE project Standard Analyses & Code Sharing - Analyses & Displays Associated with Demographics, Disposition, and Medications in Phase 2-4 Clinical Trials and Integrated Summary Documents.

+

For many of the tables that go into a clinical submission, the tables are made up of a combination of these approaches. Consider a demographics table - and let’s use an example from the PHUSE project Standard Analyses & Code Sharing - Analyses & Displays Associated with Demographics, Disposition, and Medications in Phase 2-4 Clinical Trials and Integrated Summary Documents.

When you look at this table, you can begin breaking this output down into smaller, redundant, components. These components can be viewed as ‘layers’, and the table as a whole is constructed by stacking the layers. The boxes in the image above represent how you can begin to conceptualize this.

@@ -176,25 +149,26 @@

  • Weight - and we’re back to descriptive statistics.
  • So we have one table, with 6 summaries (7 including the next page, not shown) - but only 2 different approaches to summaries being performed. In the same way that dplyr is a grammar of data manipulation, Tplyr aims to be a grammar of data summary. The goal of Tplyr is to allow you to program a summary table like you see it on the page, by breaking a larger problem into smaller ‘layers’, and combining them together like you see on the page.

    -

    Enough talking - let’s see some code. In these examples, we will be using data from the PHUSE Test Data Factory based on the original pilot project submission package. Note: You can see our replication of the CDISC pilot using the PHUSE Test Data Factory data here.

    +

    Enough talking - let’s see some code. In these examples, we will be using data from the PHUSE Test Data Factory based on the original pilot project submission package. Note: You can see our replication of the CDISC pilot using the PHUSE Test Data Factory data here.

    -tplyr_table(adsl, TRT01P, where = SAFFL == "Y") %>% 
    -  add_layer(
    -    group_desc(AGE, by = "Age (years)")
    -  ) %>% 
    -  add_layer(
    -    group_count(AGEGR1, by = "Age Categories n (%)")
    -  ) %>% 
    -  build() %>% 
    -  kable()
    + +tplyr_table(adsl, TRT01P, where = SAFFL == "Y") %>% + add_layer( + group_desc(AGE, by = "Age (years)") + ) %>% + add_layer( + group_count(AGEGR1, by = "Age Categories n (%)") + ) %>% + build() %>% + kable()

    -+--++ @@ -301,15 +275,15 @@

    -
    -

    -‘Tplyr’ is Qualified

    -

    We understand how important documentation and testing is within the pharmaceutical world. This is why outside of unit testing ’Tplyr includes an entire user-acceptable testing document, where requirements were established, test-cases were written, and tests were independently programmed and executed. We do this in the hope that you can leverage our work within a qualified programming environment, and that we save you a substantial amount of trouble in getting it there.

    -

    You can find the qualification document within this repository right here. The ‘uat’ folder additionally contains all of the raw files, programmatic tests, specifications, and test cases necessary to create this report.

    +
    +

    ‘Tplyr’ is Qualified +

    +

    We understand how important documentation and testing is within the pharmaceutical world. This is why outside of unit testing ’Tplyr includes an entire user-acceptance testing document, where requirements were established, test-cases were written, and tests were independently programmed and executed. We do this in the hope that you can leverage our work within a qualified programming environment, and that we save you a substantial amount of trouble in getting it there.

    +

    You can find the qualification document within this repository right here. The ‘uat’ folder additionally contains all of the raw files, programmatic tests, specifications, and test cases necessary to create this report.

    -
    -

    -The TL;DR

    +
    +

    The TL;DR +

    Here are some of the high level benefits of using Tplyr:

    • Easy construction of table data using an intuitive syntax
    • @@ -318,9 +292,9 @@

    -
    -

    -Where to go from here?

    +
    +

    Where to go from here? +

    There’s quite a bit more to learn! And we’ve prepared a number of other vignettes to help you get what you need out of ‘Tplyr’.

    • The best place to start is with our Getting Started vignette at vignette("Tplyr") @@ -344,82 +318,97 @@

    • And finally, learn more about producing and outputting styled tables using ‘Tplyr’ in vignette("styled-table")
    +

    In the Tplyr version 1.0.0, we’ve packed a number of new features in. For deeper dives on the largest new additions:

    +
    -
    -

    -References

    +
    +

    References +

    In building ‘Tplyr’, we needed some additional resources in addition to our personal experience to help guide design. PHUSE has done some great work to create guidance for standard outputs with collaboration between multiple pharmaceutical companies and the FDA. You can find some of the resource that we referenced below.

    -

    Analysis and Displays Associated with Adverse Events

    -

    Analyses and Displays Associated with Demographics, Disposition, and Medications

    -

    Analyses and Displays Associated with Measures of Central Tendency

    +

    Analysis and Displays Associated with Adverse Events

    +

    Analyses and Displays Associated with Demographics, Disposition, and Medications

    +

    Analyses and Displays Associated with Measures of Central Tendency

    -
    - - + diff --git a/docs/news/index.html b/docs/news/index.html index 46b32ced..1d4bdb8c 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -1,281 +1,157 @@ - - - - - - - -Changelog • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Changelog • Tplyr - - - - - - - - - - -
    -
    -
    +
    + +
    +
    -
    - +
    -
    - +
    + - - - + diff --git a/docs/pkgdown.css b/docs/pkgdown.css old mode 100755 new mode 100644 diff --git a/docs/pkgdown.js b/docs/pkgdown.js index 7e7048fa..5fccd9c0 100644 --- a/docs/pkgdown.js +++ b/docs/pkgdown.js @@ -2,70 +2,30 @@ (function($) { $(function() { - $('.navbar-fixed-top').headroom(); + $('nav.navbar').headroom(); - $('body').css('padding-top', $('.navbar').height() + 10); - $(window).resize(function(){ - $('body').css('padding-top', $('.navbar').height() + 10); + Toc.init({ + $nav: $("#toc"), + $scope: $("main h2, main h3, main h4, main h5, main h6") }); - $('[data-toggle="tooltip"]').tooltip(); - - var cur_path = paths(location.pathname); - var links = $("#navbar ul li a"); - var max_length = -1; - var pos = -1; - for (var i = 0; i < links.length; i++) { - if (links[i].getAttribute("href") === "#") - continue; - // Ignore external links - if (links[i].host !== location.host) - continue; - - var nav_path = paths(links[i].pathname); - - var length = prefix_length(nav_path, cur_path); - if (length > max_length) { - max_length = length; - pos = i; - } - } - - // Add class to parent
  • , and enclosing
  • if in dropdown - if (pos >= 0) { - var menu_anchor = $(links[pos]); - menu_anchor.parent().addClass("active"); - menu_anchor.closest("li.dropdown").addClass("active"); - } - }); - - function paths(pathname) { - var pieces = pathname.split("/"); - pieces.shift(); // always starts with / - - var end = pieces[pieces.length - 1]; - if (end === "index.html" || end === "") - pieces.pop(); - return(pieces); - } - - // Returns -1 if not found - function prefix_length(needle, haystack) { - if (needle.length > haystack.length) - return(-1); - - // Special case for length-0 haystack, since for loop won't run - if (haystack.length === 0) { - return(needle.length === 0 ? 0 : -1); + if ($('#toc').length) { + $('body').scrollspy({ + target: '#toc', + offset: $("nav.navbar").outerHeight() + 1 + }); } - for (var i = 0; i < haystack.length; i++) { - if (needle[i] != haystack[i]) - return(i); - } + // Activate popovers + $('[data-bs-toggle="popover"]').popover({ + container: 'body', + html: true, + trigger: 'focus', + placement: "top", + sanitize: false, + }); - return(haystack.length); - } + $('[data-bs-toggle="tooltip"]').tooltip(); /* Clipboard --------------------------*/ @@ -78,9 +38,9 @@ if(ClipboardJS.isSupported()) { $(document).ready(function() { - var copyButton = ""; + var copyButton = ""; - $(".examples, div.sourceCode").addClass("hasCopyButton"); + $("div.sourceCode").addClass("hasCopyButton"); // Insert copy buttons: $(copyButton).prependTo(".hasCopyButton"); @@ -89,20 +49,108 @@ $('.btn-copy-ex').tooltip({container: 'body'}); // Initialize clipboard: - var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { + var clipboard = new ClipboardJS('[data-clipboard-copy]', { text: function(trigger) { - return trigger.parentNode.textContent; + return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); } }); - clipboardBtnCopies.on('success', function(e) { + clipboard.on('success', function(e) { changeTooltipMessage(e.trigger, 'Copied!'); e.clearSelection(); }); - clipboardBtnCopies.on('error', function() { + clipboard.on('error', function() { changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); }); + }); } + + /* Search marking --------------------------*/ + var url = new URL(window.location.href); + var toMark = url.searchParams.get("q"); + var mark = new Mark("main#main"); + if (toMark) { + mark.mark(toMark, { + accuracy: { + value: "complementary", + limiters: [",", ".", ":", "/"], + } + }); + } + + /* Search --------------------------*/ + /* Adapted from https://github.com/rstudio/bookdown/blob/2d692ba4b61f1e466c92e78fd712b0ab08c11d31/inst/resources/bs4_book/bs4_book.js#L25 */ + // Initialise search index on focus + var fuse; + $("#search-input").focus(async function(e) { + if (fuse) { + return; + } + + $(e.target).addClass("loading"); + var response = await fetch($("#search-input").data("search-index")); + var data = await response.json(); + + var options = { + keys: ["what", "text", "code"], + ignoreLocation: true, + threshold: 0.1, + includeMatches: true, + includeScore: true, + }; + fuse = new Fuse(data, options); + + $(e.target).removeClass("loading"); + }); + + // Use algolia autocomplete + var options = { + autoselect: true, + debug: true, + hint: false, + minLength: 2, + }; + var q; +async function searchFuse(query, callback) { + await fuse; + + var items; + if (!fuse) { + items = []; + } else { + q = query; + var results = fuse.search(query, { limit: 20 }); + items = results + .filter((x) => x.score <= 0.75) + .map((x) => x.item); + if (items.length === 0) { + items = [{dir:"Sorry 😿",previous_headings:"",title:"No results found.",what:"No results found.",path:window.location.href}]; + } + } + callback(items); +} + $("#search-input").autocomplete(options, [ + { + name: "content", + source: searchFuse, + templates: { + suggestion: (s) => { + if (s.title == s.what) { + return `${s.dir} >
    ${s.title}
    `; + } else if (s.previous_headings == "") { + return `${s.dir} >
    ${s.title}
    > ${s.what}`; + } else { + return `${s.dir} >
    ${s.title}
    > ${s.previous_headings} > ${s.what}`; + } + }, + }, + }, + ]).on('autocomplete:selected', function(event, s) { + window.location.href = s.path + "?q=" + q + "#" + s.id; + }); + }); })(window.jQuery || window.$) + + diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index f8fae4ff..ec575f6f 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -1,17 +1,22 @@ -pandoc: 2.11.4 -pkgdown: 1.6.1 +pandoc: '2.18' +pkgdown: 2.0.6 pkgdown_sha: ~ articles: Tplyr: Tplyr.html count: count.html + custom-metadata: custom-metadata.html denom: denom.html desc: desc.html + layer_templates: layer_templates.html + metadata: metadata.html options: options.html - readme: readme.html riskdiff: riskdiff.html shift: shift.html sort: sort.html styled-table: styled-table.html table: table.html -last_built: 2022-01-07T18:12Z +last_built: 2022-10-14T16:16Z +urls: + reference: https://atorus-research.github.io/Tplyr/reference + article: https://atorus-research.github.io/Tplyr/articles diff --git a/docs/reference/Tplyr.html b/docs/reference/Tplyr.html deleted file mode 100644 index d783a34e..00000000 --- a/docs/reference/Tplyr.html +++ /dev/null @@ -1,378 +0,0 @@ - - - - - - - - -A grammar of summary data for clinical reports — Tplyr • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    `r lifecycle::badge("experimental")`

    -
    - - - -

    Details

    - -

    'Tplyr' is a package dedicated to simplifying the data manipulation necessary -to create clinical reports. Clinical data summaries can often be broken down -into two factors - counting discrete variables (or counting shifts in state), -and descriptive statistics around a continuous variable. Many of the reports -that go into a clinical report are made up of these two scenarios. By -abstracting this process away, 'Tplyr' allows you to rapidly build these -tables without worrying about the underlying data manipulation.

    -

    'Tplyr' takes this process a few steps further by abstracting away most of -the programming that goes into proper presentation, which is where a great -deal of programming time is spent. For example, 'Tplyr' allows you to easily -control:

    -
    -
    String formatting

    Different reports warrant -different presentation of your strings. Programming this can get tedious, as -you typically want to make sure that your decimals properly align. 'Tplyr' -abstracts this process away and provides you with a simple interface to -specify how you want your data presented

    Treatment -groups

    Need a total column? Need to group summaries of multiple treatments? -'Tplyr' makes it simple to add additional treatment groups into your report

    -
    Denominators

    n (%) counts often vary based on the summary -being performed. 'Tplyr' allows you to easily control what denominators are -used based on a few common scenarios

    Sorting

    Summarizing -data is one thing, but ordering it for presentation. Tplyr automatically -derives sorting variable to give you the data you need to order your table -properly. This process is flexible so you can easily get what you want by -leveraging your data or characteristics of R.

    -
    - -

    Another powerful aspect of 'Tplyr' are the objects themselves. 'Tplyr' does -more than format your data. Metadata about your table is kept under the hood, -and functions allow you to access information that you need. For example, -'Tplyr' allows you to calculate and access the raw numeric data of -calculations as well, and easily pick out just the pieces of information that -you need.

    -

    Lastly, 'Tplyr' was built to be flexible, yet intuitive. A common pitfall of -building tools like this is over automation. By doing to much, you end up not -doing enough. 'Tplyr' aims to hit the sweet spot in between. Additionally, we -designed our function interfaces to be clean. Modifier functions offer you -flexibility when you need it, but defaults can be set to keep the code -concise. This allows you to quickly assemble your table, and easily make -changes where necessary.

    -

    See also

    - - -

    Author

    - -

    Maintainer: Mike Stackhouse mike.stackhouse@atorusresearch.com (ORCID)

    -

    Authors:

    - -

    Other contributors:

      -
    • Atorus Research LLC [copyright holder]

    • -
    - - - -

    Examples

    -
    # Load in pipe -library(magrittr) - -# Use just the defaults -tplyr_table(mtcars, gear) %>% - add_layer( - group_desc(mpg, by=cyl) - ) %>% - add_layer( - group_count(carb, by=cyl) - ) %>% - build() -
    #> # A tibble: 36 × 8 -#> row_label1 row_label2 var1_3 var1_4 var1_5 ord_layer_index ord_layer_1 -#> <chr> <chr> <chr> <chr> <chr> <int> <dbl> -#> 1 4 n " 1" " 8" " 2" 1 1 -#> 2 4 Mean (SD) "21.50 (… "26.92 … "28.20 … 1 1 -#> 3 4 Median "21.50" "25.85" "28.20" 1 1 -#> 4 4 Q1, Q3 "21.50, … "22.80,… "27.10,… 1 1 -#> 5 4 Min, Max "21.5, 2… "21.4, … "26.0, … 1 1 -#> 6 4 Missing " 0" " 0" " 0" 1 1 -#> 7 6 n " 2" " 4" " 1" 1 2 -#> 8 6 Mean (SD) "19.75 (… "19.75 … "19.70 … 1 2 -#> 9 6 Median "19.75" "20.10" "19.70" 1 2 -#> 10 6 Q1, Q3 "18.92, … "18.85,… "19.70,… 1 2 -#> # … with 26 more rows, and 1 more variable: ord_layer_2 <dbl>
    -# Customize and modify -tplyr_table(mtcars, gear) %>% - add_layer( - group_desc(mpg, by=cyl) %>% - set_format_strings( - "n" = f_str("xx", n), - "Mean (SD)" = f_str("a.a+1 (a.a+2)", mean, sd, empty='NA'), - "Median" = f_str("a.a+1", median), - "Q1, Q3" = f_str("a, a", q1, q3, empty=c(.overall='NA')), - "Min, Max" = f_str("a, a", min, max), - "Missing" = f_str("xx", missing) - ) - ) %>% - add_layer( - group_count(carb, by=cyl) %>% - add_risk_diff( - c('5', '3'), - c('4', '3') - ) %>% - set_format_strings( - n_counts = f_str('xx (xx%)', n, pct), - riskdiff = f_str('xx.xxx (xx.xxx, xx.xxx)', dif, low, high) - ) %>% - set_order_count_method("bycount") %>% - set_ordering_cols('4') %>% - set_result_order_var(pct) - ) %>% - build() -
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> # A tibble: 36 × 10 -#> row_label1 row_label2 var1_3 var1_4 var1_5 ord_layer_index ord_layer_1 -#> <chr> <chr> <chr> <chr> <chr> <int> <dbl> -#> 1 4 n " 1" " 8" " 2" 1 1 -#> 2 4 Mean (SD) "21.50 (… "26.92 … "28.20 … 1 1 -#> 3 4 Median "21.50" "25.85" "28.20" 1 1 -#> 4 4 Q1, Q3 "22, 22" "23, 31" "27, 29" 1 1 -#> 5 4 Min, Max "22, 22" "21, 34" "26, 30" 1 1 -#> 6 4 Missing " 0" " 0" " 0" 1 1 -#> 7 6 n " 2" " 4" " 1" 1 2 -#> 8 6 Mean (SD) "19.75 (… "19.75 … "19.70 … 1 2 -#> 9 6 Median "19.75" "20.10" "19.70" 1 2 -#> 10 6 Q1, Q3 "19, 21" "19, 21" "20, 20" 1 2 -#> # … with 26 more rows, and 3 more variables: ord_layer_2 <dbl>, -#> # rdiff_5_3 <chr>, rdiff_4_3 <chr>
    -# A Shift Table -tplyr_table(mtcars, am) %>% - add_layer( - group_shift(vars(row=gear, column=carb), by=cyl) %>% - set_format_strings(f_str("xxx (xx.xx%)", n, pct)) - ) %>% - build() -
    #> # A tibble: 9 × 17 -#> row_label1 row_label2 var1_0_1 var1_0_2 var1_0_3 var1_0_4 var1_0_6 var1_0_8 -#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> -#> 1 4 3 " 1 (33… " 0 ( 0… " 0 ( 0… " 0 ( … " 0 ( … " 0 ( … -#> 2 4 4 " 0 ( 0… " 2 (66… " 0 ( 0… " 0 ( … " 0 ( … " 0 ( … -#> 3 4 5 " 0 ( 0… " 0 ( 0… " 0 ( 0… " 0 ( … " 0 ( … " 0 ( … -#> 4 6 3 " 2 (50… " 0 ( 0… " 0 ( 0… " 0 ( … " 0 ( … " 0 ( … -#> 5 6 4 " 0 ( 0… " 0 ( 0… " 0 ( 0… " 2 (5… " 0 ( … " 0 ( … -#> 6 6 5 " 0 ( 0… " 0 ( 0… " 0 ( 0… " 0 ( … " 0 ( … " 0 ( … -#> 7 8 3 " 0 ( 0… " 4 (33… " 3 (25… " 5 (4… " 0 ( … " 0 ( … -#> 8 8 4 " 0 ( 0… " 0 ( 0… " 0 ( 0… " 0 ( … " 0 ( … " 0 ( … -#> 9 8 5 " 0 ( 0… " 0 ( 0… " 0 ( 0… " 0 ( … " 0 ( … " 0 ( … -#> # … with 9 more variables: var1_1_1 <chr>, var1_1_2 <chr>, var1_1_3 <chr>, -#> # var1_1_4 <chr>, var1_1_6 <chr>, var1_1_8 <chr>, ord_layer_index <int>, -#> # ord_layer_1 <dbl>, ord_layer_2 <dbl>
    -
    -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/add_column_headers.html b/docs/reference/add_column_headers.html index 6256c3bf..f06500d2 100644 --- a/docs/reference/add_column_headers.html +++ b/docs/reference/add_column_headers.html @@ -1,223 +1,122 @@ - - - - - - - -Attach column headers to a Tplyr output — add_column_headers • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Attach column headers to a Tplyr output — add_column_headers • Tplyr - - - - - + + Skip to contents + +
    -
    - -
    - +
    + - - - + diff --git a/docs/reference/add_risk_diff.html b/docs/reference/add_risk_diff.html index 16e102e6..eb24c2a7 100644 --- a/docs/reference/add_risk_diff.html +++ b/docs/reference/add_risk_diff.html @@ -1,251 +1,156 @@ - - - - - - - -Add risk difference to a count layer — add_risk_diff • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    -
    +
    + +
    +
    -
    - +
    +

    Examples

    +
    library(magrittr)
    +
    +## Two group comparisons with default options applied
    +t <- tplyr_table(mtcars, gear)
    +
    +# Basic risk diff for two groups, using defaults
    +l1 <- group_count(t, carb) %>%
    +  # Compare 3 vs. 4, 3 vs. 5
    +  add_risk_diff(
    +    c('3', '4'),
    +    c('3', '5')
    +  )
    +
    +# Build and show output
    +add_layers(t, l1) %>% build()
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> # A tibble: 6 × 8
    +#>   row_label1 var1_3     var1_4     var1_5    ord_layer_index rdiff_3_4 rdiff_3_5
    +#>   <chr>      <chr>      <chr>      <chr>               <int> <chr>     <chr>    
    +#> 1 1          3 ( 20.0%) 4 ( 33.3%) 0 (  0.0…               1 "-0.133 … " 0.200 …
    +#> 2 2          4 ( 26.7%) 4 ( 33.3%) 2 ( 40.0…               1 "-0.067 … "-0.133 …
    +#> 3 3          3 ( 20.0%) 0 (  0.0%) 0 (  0.0…               1 " 0.200 … " 0.200 …
    +#> 4 4          5 ( 33.3%) 4 ( 33.3%) 1 ( 20.0…               1 " 0.000 … " 0.133 …
    +#> 5 6          0 (  0.0%) 0 (  0.0%) 1 ( 20.0…               1 " 0.000 … "-0.200 …
    +#> 6 8          0 (  0.0%) 0 (  0.0%) 1 ( 20.0…               1 " 0.000 … "-0.200 …
    +#> # … with 1 more variable: ord_layer_1 <dbl>
    +
    +## Specify custom formats and display variables
    +t <- tplyr_table(mtcars, gear)
    +
    +# Create the layer with custom formatting
    +l2 <- group_count(t, carb) %>%
    +  # Compare 3 vs. 4, 3 vs. 5
    +  add_risk_diff(
    +    c('3', '4'),
    +    c('3', '5')
    +  ) %>%
    +  set_format_strings(
    +    'n_counts' = f_str('xx (xx.x)', n, pct),
    +    'riskdiff' = f_str('xx.xxx, xx.xxx, xx.xxx, xx.xxx, xx.xxx', comp, ref, dif, low, high)
    +  )
    +
    +# Build and show output
    +add_layers(t, l2) %>% build()
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> # A tibble: 6 × 8
    +#>   row_label1 var1_3      var1_4      var1_5  ord_layer_index rdiff_3_4 rdiff_3_5
    +#>   <chr>      <chr>       <chr>       <chr>             <int> <chr>     <chr>    
    +#> 1 1          " 3 (20.0)" " 4 (33.3)" " 0 ( …               1 " 0.200,… " 0.200,…
    +#> 2 2          " 4 (26.7)" " 4 (33.3)" " 2 (4…               1 " 0.267,… " 0.267,…
    +#> 3 3          " 3 (20.0)" " 0 ( 0.0)" " 0 ( …               1 " 0.200,… " 0.200,…
    +#> 4 4          " 5 (33.3)" " 4 (33.3)" " 1 (2…               1 " 0.333,… " 0.333,…
    +#> 5 6          " 0 ( 0.0)" " 0 ( 0.0)" " 1 (2…               1 " 0.000,… " 0.000,…
    +#> 6 8          " 0 ( 0.0)" " 0 ( 0.0)" " 1 (2…               1 " 0.000,… " 0.000,…
    +#> # … with 1 more variable: ord_layer_1 <dbl>
    +
    +## Passing arguments to prop.test
    +t <- tplyr_table(mtcars, gear)
    +
    +# Create the layer with args option
    +l3 <- group_count(t, carb) %>%
    +  # Compare 3 vs. 4, 4 vs. 5
    +  add_risk_diff(
    +    c('3', '4'),
    +    c('3', '5'),
    +    args = list(conf.level = 0.9, correct=FALSE, alternative='less')
    +  )
    +
    +# Build and show output
    +add_layers(t, l3) %>% build()
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> # A tibble: 6 × 8
    +#>   row_label1 var1_3     var1_4     var1_5    ord_layer_index rdiff_3_4 rdiff_3_5
    +#>   <chr>      <chr>      <chr>      <chr>               <int> <chr>     <chr>    
    +#> 1 1          3 ( 20.0%) 4 ( 33.3%) 0 (  0.0…               1 "-0.133 … " 0.200 …
    +#> 2 2          4 ( 26.7%) 4 ( 33.3%) 2 ( 40.0…               1 "-0.067 … "-0.133 …
    +#> 3 3          3 ( 20.0%) 0 (  0.0%) 0 (  0.0…               1 " 0.200 … " 0.200 …
    +#> 4 4          5 ( 33.3%) 4 ( 33.3%) 1 ( 20.0…               1 " 0.000 … " 0.133 …
    +#> 5 6          0 (  0.0%) 0 (  0.0%) 1 ( 20.0…               1 " 0.000 … "-0.200 …
    +#> 6 8          0 (  0.0%) 0 (  0.0%) 1 ( 20.0…               1 " 0.000 … "-0.200 …
    +#> # … with 1 more variable: ord_layer_1 <dbl>
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/add_total_row.html b/docs/reference/add_total_row.html index 53b93b96..998533b9 100644 --- a/docs/reference/add_total_row.html +++ b/docs/reference/add_total_row.html @@ -1,277 +1,171 @@ - - - - - - - -Add a Total row into a count summary. — add_total_row • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + Skip to contents + +
    -
    - +
    +

    Examples

    +
    # Load in Pipe
    +library(magrittr)
    +
    +tplyr_table(mtcars, gear) %>%
    +  add_layer(
    +    group_count(cyl) %>%
    +      add_total_row(f_str("xxxx", n))
    +   ) %>%
    +   build()
    +#> # A tibble: 4 × 6
    +#>   row_label1 var1_3        var1_4        var1_5      ord_layer_index ord_layer_1
    +#>   <chr>      <chr>         <chr>         <chr>                 <int>       <dbl>
    +#> 1 4          " 1 (  6.7%)" " 8 ( 66.7%)" " 2 ( 40.0…               1           1
    +#> 2 6          " 2 ( 13.3%)" " 4 ( 33.3%)" " 1 ( 20.0…               1           2
    +#> 3 8          "12 ( 80.0%)" " 0 (  0.0%)" " 2 ( 40.0…               1           3
    +#> 4 Total      "  15"        "  12"        "   5"                    1           4
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/append_metadata.html b/docs/reference/append_metadata.html new file mode 100644 index 00000000..869b927e --- /dev/null +++ b/docs/reference/append_metadata.html @@ -0,0 +1,193 @@ + +Append the Tplyr table metadata dataframe — append_metadata • Tplyr + Skip to contents + + +
    +
    +
    + +
    +

    append_metadata() allows a user to extend the Tplyr metadata data frame +with user provided data. In some tables, Tplyr may be able to provided most +of the data, but a user may have to extend the table with other summaries, +statistics, etc. This function allows the user to extend the tplyr_table's +metadata with their own metadata content using custom data frames created +using the tplyr_meta object.

    +
    + +
    +

    Usage

    +
    append_metadata(t, meta)
    +
    + +
    +

    Arguments

    +
    t
    +

    A tplyr_table object

    + + +
    meta
    +

    A dataframe fitting the specifications of the details section of +this function

    + +
    +
    +

    Value

    + + +

    A tplyr_table object

    +
    +
    +

    Details

    +

    As this is an advanced feature of Tplyr, ownership is on the user to make +sure the metadata data frame is assembled properly. The only restrictions +applied by append_metadata() are that meta must have a column named +row_id, and the values in row_id cannot be duplicates of any row_id +value already present in the Tplyr metadata dataframe. tplyr_meta() objects +align with constructed dataframes using the row_id and output dataset +column name. As such, tplyr_meta() objects should be inserted into a data +frame using a list column.

    +
    + +
    +

    Examples

    +
    t <- tplyr_table(mtcars, gear) %>%
    +  add_layer(
    +    group_desc(wt)
    +  )
    +
    +t %>%
    +  build(metadata=TRUE)
    +#> # A tibble: 6 × 7
    +#>   row_id row_label1 var1_3             var1_4 var1_5 ord_layer_index ord_layer_1
    +#>   <chr>  <chr>      <chr>              <chr>  <chr>            <int>       <int>
    +#> 1 d1_1   n          " 15"              " 12"  "  5"                1           1
    +#> 2 d2_1   Mean (SD)  "3.8926 (0.83299)" "2.61… "2.63…               1           2
    +#> 3 d3_1   Median     "3.7300"           "2.70… "2.77…               1           3
    +#> 4 d4_1   Q1, Q3     "3.4500, 3.9575"   "2.13… "2.14…               1           4
    +#> 5 d5_1   Min, Max   "2.465, 5.424"     "1.61… "1.51…               1           5
    +#> 6 d6_1   Missing    "  0"              "  0"  "  0"                1           6
    +
    +m <- tibble::tibble(
    +  row_id = c('x1_1'),
    +  var1_3 = list(tplyr_meta(rlang::quos(a, b, c), rlang::quos(a==1, b==2, c==3)))
    +)
    +
    +append_metadata(t, m)
    +#> *** tplyr_table ***
    +#> Target (data.frame):
    +#> 	Name:  mtcars
    +#> 	Rows:  32
    +#> 	Columns:  11 
    +#> treat_var variable (quosure)
    +#> 	gear
    +#> header_n: 3 header groups
    +#> treat_grps groupings (list)
    +#> Table Columns (cols):
    +#> where: TRUE
    +#> Number of layer(s): 1
    +#> layer_output: 0
    +
    +
    +
    + + +
    + + + +
    + + + + + + + diff --git a/docs/reference/apply_formats.html b/docs/reference/apply_formats.html new file mode 100644 index 00000000..4ce039ff --- /dev/null +++ b/docs/reference/apply_formats.html @@ -0,0 +1,175 @@ + +Apply Format Strings outside of a Tplyr table — apply_formats • Tplyr + Skip to contents + + +
    +
    +
    + +
    +

    The f_str object in Tplyr is used to drive formatting of the outputs +strings within a Tplyr table. This function allows a user to use the same +interface to apply formatted string on any data frame within a +dplyr::mutate() context.

    +
    + +
    +

    Usage

    +
    apply_formats(format_string, ..., empty = c(.overall = ""))
    +
    + +
    +

    Arguments

    +
    format_string
    +

    The desired display format. X's indicate digits. On the +left, the number of x's indicates the integer length. On the right, the +number of x's controls decimal precision and rounding. Variables are +inferred by any separation of the 'x' values other than a decimal.

    + + +
    ...
    +

    The variables to be formatted using the format specified in +format_string. These must be numeric variables.

    + + +
    empty
    +

    The string to display when the numeric data is not available. +Use a single element character vector, with the element named '.overall' to +instead replace the whole string.

    + +
    +
    +

    Value

    + + +

    Character vector of formatted values

    +
    +
    +

    Details

    +

    Note that auto-precision is not currently supported within apply_formats()

    +
    + +
    +

    Examples

    +
    
    +library(dplyr)
    +
    +mtcars %>%
    +  head() %>%
    +  mutate(
    +    fmt_example = apply_formats('xxx (xx.x)', hp, wt)
    +  )
    +#>                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
    +#> Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
    +#> Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
    +#> Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
    +#> Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
    +#> Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
    +#> Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
    +#>                   fmt_example
    +#> Mazda RX4          110 ( 2.6)
    +#> Mazda RX4 Wag      110 ( 2.9)
    +#> Datsun 710          93 ( 2.3)
    +#> Hornet 4 Drive     110 ( 3.2)
    +#> Hornet Sportabout  175 ( 3.4)
    +#> Valiant            105 ( 3.5)
    +
    +
    +
    + + +
    + + + +
    + + + + + + + diff --git a/docs/reference/apply_row_masks.html b/docs/reference/apply_row_masks.html index 27790dbf..d17dfe52 100644 --- a/docs/reference/apply_row_masks.html +++ b/docs/reference/apply_row_masks.html @@ -1,196 +1,98 @@ - - - - - - - -Replace repeating row label variables with blanks in preparation for display. — apply_row_masks • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + Skip to contents + +
    -
    - - -
    +
    -
    - +
    + - - - + diff --git a/docs/reference/build.html b/docs/reference/build.html index edd71bca..db155231 100644 --- a/docs/reference/build.html +++ b/docs/reference/build.html @@ -1,278 +1,196 @@ - - - - - - - -Trigger the execution of the tplyr_table — build • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Trigger the execution of the tplyr_table — build • Tplyr - - - - - + + Skip to contents + +
    -
    - +
    +

    Details

    +

    When the build command is executed, all of the data +processing commences. Any pre-processing necessary within the table +environment takes place first. Next, each of the layers begins executing. +Once the layers complete executing, the output of each layer is stacked into +the resulting data frame.

    +

    Once this process is complete, any post-processing necessary within the table +environment takes place, and the final output can be delivered. Metadata and +traceability information are kept within each of the layer environments, +which allows an investigation into the source of the resulting datapoints. +For example, numeric data from any summaries performed is maintained and +accessible within a layer using get_numeric_data.

    +

    The `metadata` option of build will trigger the construction of traceability +metadata for the constructed data frame. Essentially, for every "result" that +Tplyr produces, Tplyr can also generate the steps necessary to obtain the +source data which produced that result from the input. For more information, +see vignette("metadata").

    +
    +
    +

    See also

    +

    tplyr_table, tplyr_layer, add_layer, add_layers, layer_constructors

    +
    + +
    +

    Examples

    +
    # Load in Pipe
    +library(magrittr)
    +
    +tplyr_table(iris, Species) %>%
    +  add_layer(
    +    group_desc(Sepal.Length, by = "Sepal Length")
    +  ) %>%
    +  add_layer(
    +    group_desc(Sepal.Width, by = "Sepal Width")
    +  ) %>%
    +  build()
    +#> # A tibble: 12 × 8
    +#>    row_label1   row_label2 var1_setosa    var1_versicolor var1_virginica
    +#>    <chr>        <chr>      <chr>          <chr>           <chr>         
    +#>  1 Sepal Length n          " 50"          " 50"           " 50"         
    +#>  2 Sepal Length Mean (SD)  "5.01 (0.352)" "5.94 (0.516)"  "6.59 (0.636)"
    +#>  3 Sepal Length Median     "5.00"         "5.90"          "6.50"        
    +#>  4 Sepal Length Q1, Q3     "4.80, 5.20"   "5.60, 6.30"    "6.23, 6.90"  
    +#>  5 Sepal Length Min, Max   "4.3, 5.8"     "4.9, 7.0"      "4.9, 7.9"    
    +#>  6 Sepal Length Missing    "  0"          "  0"           "  0"         
    +#>  7 Sepal Width  n          " 50"          " 50"           " 50"         
    +#>  8 Sepal Width  Mean (SD)  "3.43 (0.379)" "2.77 (0.314)"  "2.97 (0.322)"
    +#>  9 Sepal Width  Median     "3.40"         "2.80"          "3.00"        
    +#> 10 Sepal Width  Q1, Q3     "3.20, 3.68"   "2.52, 3.00"    "2.80, 3.18"  
    +#> 11 Sepal Width  Min, Max   "2.3, 4.4"     "2.0, 3.4"      "2.2, 3.8"    
    +#> 12 Sepal Width  Missing    "  0"          "  0"           "  0"         
    +#> # … with 3 more variables: ord_layer_index <int>, ord_layer_1 <int>,
    +#> #   ord_layer_2 <int>
    +
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/by.html b/docs/reference/by.html index 80767a7d..cfee31b7 100644 --- a/docs/reference/by.html +++ b/docs/reference/by.html @@ -1,248 +1,141 @@ - - - - - - - -Set or return by layer binding — get_by • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Set or return by layer binding — get_by • Tplyr - - - - - - - - - - -
    -
    -
    +
    + +
    +
    -
    - +
    +

    Usage

    +
    get_by(layer)
    +
    +set_by(layer, by)
    +
    + +
    +

    Arguments

    +
    layer
    +

    A tplyr_layer object

    + + +
    by
    +

    A string, a variable name, or a list of variable names supplied +using dplyr::vars.

    + +
    +
    +

    Value

    + +

    For get_by, the by binding of the supplied layer. For

    +

    +

    set_by the modified layer environment.

    +
    + +
    +

    Examples

    +
    # Load in pipe
    +library(magrittr)
    +iris$Species2 <- iris$Species
    +lay <- tplyr_table(iris, Species) %>%
    +  group_count(Species) %>%
    +  set_by(vars(Species2, Sepal.Width))
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/f_str.html b/docs/reference/f_str.html index fcd4a26a..4aa08b59 100644 --- a/docs/reference/f_str.html +++ b/docs/reference/f_str.html @@ -1,354 +1,287 @@ - - - - - - - -Create a f_str object — f_str • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create a f_str object — f_str • Tplyr - - - - - + + Skip to contents + +
    -
    - +example, if you're presenting a value like "mean (sd)" - you could provide +the string 'xx.xx (xx.xxx)', or perhaps 'a.a+1 (a.a+2). Note that you're +able to provide different integer lengths and different decimal precision +for the two values. Each format string is independent and relates only to +the format specified.

    +

    The other parameters of the f_str call specify what values should fill +the x's. f_str objects are used slightly differently between different +layers. When declaring a format string within a count layer, f_str() +expects to see the values n or distinct_n for event or distinct counts, +pct or distinct_pct for event or distinct percentages, or total or +distinct_total for denominator calculations. But in descriptive statistic +layers, f_str parameters refer to the names of the summaries being +performed, either by built in defaults, or custom summaries declared using +set_custom_summaries(). See set_format_strings() for some more notes +about layers specific implementation.

    +

    An f_str() may also be used outside of a Tplyr table. The function +apply_formats() allows you to apply an f_str within the context of +dplyr::mutate() or more generally a vectorized function.

    +
    +
    +

    Valid f_str() Variables by Layer Type

    + + + +

    Valid variables allowed within the ... parameter of f_str() differ by +layer type.

    • Count layers

      • n

      • +
      • pct

      • +
      • total

      • +
      • distinct_n

      • +
      • distinct_pct

      • +
      • distinct_total

      • +
    • +
    • Shift layers

      • n

      • +
      • pct

      • +
      • total

      • +
    • +
    • Desc layers

      • n

      • +
      • mean

      • +
      • sd

      • +
      • median

      • +
      • variance

      • +
      • min

      • +
      • max

      • +
      • iqr

      • +
      • q1

      • +
      • q3

      • +
      • missing

      • +
      • Custom summaries created by set_custom_summaries()

      • +
    • +
    + +
    +

    Examples

    +
    
    +f_str("xx.x (xx.x)", mean, sd)
    +#> *** Format String ***
    +#> xx.x (xx.x)
    +#> *** vars, extracted formats, and settings ***
    +#> mean formated as: xx.x
    +#> 	integer length: 2
    +#> 	decimal length: 1
    +#> sd formated as: xx.x
    +#> 	integer length: 2
    +#> 	decimal length: 1
    +#> Total Format Size: 11
    +
    +f_str("a.a+1 (a.a+2)", mean, sd)
    +#> *** Format String ***
    +#> a.a+1 (a.a+2)
    +#> *** vars, extracted formats, and settings ***
    +#> mean formated as: a.a+1
    +#> 	integer length: 0
    +#> 	decimal length: 1
    +#> sd formated as: a.a+2
    +#> 	integer length: 0
    +#> 	decimal length: 2
    +#> Total Format Size: 13
    +
    +f_str("xx.a (xx.a+1)", mean, sd)
    +#> *** Format String ***
    +#> xx.a (xx.a+1)
    +#> *** vars, extracted formats, and settings ***
    +#> mean formated as: xx.a
    +#> 	integer length: 2
    +#> 	decimal length: 0
    +#> sd formated as: xx.a+1
    +#> 	integer length: 2
    +#> 	decimal length: 1
    +#> Total Format Size: 13
    +
    +f_str("xx.x, xx.x, xx.x", q1, median, q3)
    +#> *** Format String ***
    +#> xx.x, xx.x, xx.x
    +#> *** vars, extracted formats, and settings ***
    +#> q1 formated as: xx.x
    +#> 	integer length: 2
    +#> 	decimal length: 1
    +#> median formated as: xx.x
    +#> 	integer length: 2
    +#> 	decimal length: 1
    +#> q3 formated as: xx.x
    +#> 	integer length: 2
    +#> 	decimal length: 1
    +#> Total Format Size: 16
    +
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/get_meta_result.html b/docs/reference/get_meta_result.html new file mode 100644 index 00000000..9780be7a --- /dev/null +++ b/docs/reference/get_meta_result.html @@ -0,0 +1,184 @@ + +Extract the result metadata of a Tplyr table — get_meta_result • Tplyr + Skip to contents + + +
    +
    +
    + +
    +

    Given a row_id value and a result column, this function will return the +tplyr_meta object associated with that 'cell'.

    +
    + +
    +

    Usage

    +
    get_meta_result(x, row_id, column, ...)
    +
    + +
    +

    Arguments

    +
    x
    +

    A built Tplyr table or a dataframe

    + + +
    row_id
    +

    The row_id value of the desired cell, provided as a character +string

    + + +
    column
    +

    The result column of interest, provided as a character string

    + + +
    ...
    +

    additional arguments

    + +
    +
    +

    Value

    + + +

    A tplyr_meta object

    +
    +
    +

    Details

    +

    If a Tplyr table is built with the metadata=TRUE option specified, then +metadata is assembled behind the scenes to provide traceability on each +result cell derived. The functions get_meta_result() and +get_meta_subset() allow you to access that metadata by using an ID provided +in the row_id column and the column name of the result you'd like to access. +The purpose is of the row_id variable instead of a simple row index is to +provide a sort resistant reference of the originating column, so the output +Tplyr table can be sorted in any order but the metadata are still easily +accessible.

    +

    The tplyr_meta object provided a list with two elements - names and +filters. The metadata contain every column from the target data.frame of the +Tplyr table that factored into the specified result cell, and the filters +contains all the necessary filters to subset to data summarized to create the +specified result cell. get_meta_subset() additionally provides a parameter to +specify any additional columns you would like to include in the returned +subset data frame.

    +
    + +
    +

    Examples

    +
    t <- tplyr_table(mtcars, cyl) %>%
    +  add_layer(
    +    group_desc(hp)
    +  )
    +
    +dat <- t %>% build(metadata = TRUE)
    +
    +get_meta_result(t, 'd1_1', 'var1_4')
    +#> tplyr_meta: 2 names, 3 filters
    +#> Names:
    +#>     cyl, hp 
    +#> Filters:
    +#>     cyl == c("4"), TRUE, TRUE 
    +
    +m <- t$metadata
    +dat <- t$target
    +
    +get_meta_result(t, 'd1_1', 'var1_4')
    +#> tplyr_meta: 2 names, 3 filters
    +#> Names:
    +#>     cyl, hp 
    +#> Filters:
    +#>     cyl == c("4"), TRUE, TRUE 
    +
    +
    +
    + + +
    + + + +
    + + + + + + + diff --git a/docs/reference/get_meta_subset.html b/docs/reference/get_meta_subset.html new file mode 100644 index 00000000..bbe625e0 --- /dev/null +++ b/docs/reference/get_meta_subset.html @@ -0,0 +1,223 @@ + +Extract the subset of data based on result metadata — get_meta_subset • Tplyr + Skip to contents + + +
    +
    +
    + +
    +

    Given a row_id value and a result column, this function will return the +subset of data referenced by the tplyr_meta object associated with that +'cell', which provides traceability to tie a result to its source.

    +
    + +
    +

    Usage

    +
    get_meta_subset(x, row_id, column, add_cols = vars(USUBJID), ...)
    +
    +# S3 method for data.frame
    +get_meta_subset(
    +  x,
    +  row_id,
    +  column,
    +  add_cols = vars(USUBJID),
    +  target = NULL,
    +  ...
    +)
    +
    +# S3 method for tplyr_table
    +get_meta_subset(x, row_id, column, add_cols = vars(USUBJID), ...)
    +
    + +
    +

    Arguments

    +
    x
    +

    A built Tplyr table or a dataframe

    + + +
    row_id
    +

    The row_id value of the desired cell, provided as a character +string

    + + +
    column
    +

    The result column of interest, provided as a character string

    + + +
    add_cols
    +

    Additional columns to include in subset data.frame output

    + + +
    ...
    +

    additional arguments

    + + +
    target
    +

    A data frame to be subset (if not pulled from a Tplyr table)

    + +
    +
    +

    Value

    + + +

    A data.frame

    +
    +
    +

    Details

    +

    If a Tplyr table is built with the metadata=TRUE option specified, then +metadata is assembled behind the scenes to provide traceability on each +result cell derived. The functions get_meta_result() and +get_meta_subset() allow you to access that metadata by using an ID provided +in the row_id column and the column name of the result you'd like to access. +The purpose is of the row_id variable instead of a simple row index is to +provide a sort resistant reference of the originating column, so the output +Tplyr table can be sorted in any order but the metadata are still easily +accessible.

    +

    The tplyr_meta object provided a list with two elements - names and +filters. The metadata contain every column from the target data.frame of the +Tplyr table that factored into the specified result cell, and the filters +contains all the necessary filters to subset to data summarized to create the +specified result cell. get_meta_subset() additionally provides a parameter +to specify any additional columns you would like to include in the returned +subset data frame.

    +
    + +
    +

    Examples

    +
    t <- tplyr_table(mtcars, cyl) %>%
    +  add_layer(
    +    group_desc(hp)
    +  )
    +
    +
    +dat <- t %>% build(metadata = TRUE)
    +
    +get_meta_subset(t, 'd1_1', 'var1_4', add_cols = dplyr::vars(carb))
    +#>                carb cyl  hp
    +#> Datsun 710        1   4  93
    +#> Merc 240D         2   4  62
    +#> Merc 230          2   4  95
    +#> Fiat 128          1   4  66
    +#> Honda Civic       2   4  52
    +#> Toyota Corolla    1   4  65
    +#> Toyota Corona     1   4  97
    +#> Fiat X1-9         1   4  66
    +#> Porsche 914-2     2   4  91
    +#> Lotus Europa      2   4 113
    +#> Volvo 142E        2   4 109
    +
    +m <- t$metadata
    +dat <- t$target
    +
    +get_meta_subset(t, 'd1_1', 'var1_4', add_cols = dplyr::vars(carb), target = target)
    +#>                carb cyl  hp
    +#> Datsun 710        1   4  93
    +#> Merc 240D         2   4  62
    +#> Merc 230          2   4  95
    +#> Fiat 128          1   4  66
    +#> Honda Civic       2   4  52
    +#> Toyota Corolla    1   4  65
    +#> Toyota Corona     1   4  97
    +#> Fiat X1-9         1   4  66
    +#> Porsche 914-2     2   4  91
    +#> Lotus Europa      2   4 113
    +#> Volvo 142E        2   4 109
    +
    +
    +
    + + +
    + + + +
    + + + + + + + diff --git a/docs/reference/get_metadata.html b/docs/reference/get_metadata.html new file mode 100644 index 00000000..1cdcf32a --- /dev/null +++ b/docs/reference/get_metadata.html @@ -0,0 +1,153 @@ + +Get the metadata dataframe from a tplyr_table — get_metadata • Tplyr + Skip to contents + + +
    +
    +
    + +
    +

    Pull out the metadata dataframe from a tplyr_table to work with it directly

    +
    + +
    +

    Usage

    +
    get_metadata(t)
    +
    + +
    +

    Arguments

    +
    t
    +

    A Tplyr table with metadata built

    + +
    +
    +

    Value

    + + +

    Tplyr metadata dataframe

    +
    + +
    +

    Examples

    +
    t <- tplyr_table(mtcars, gear) %>%
    +  add_layer(
    +    group_desc(wt)
    +  )
    +
    +t %>%
    +  build(metadata=TRUE)
    +#> # A tibble: 6 × 7
    +#>   row_id row_label1 var1_3             var1_4 var1_5 ord_layer_index ord_layer_1
    +#>   <chr>  <chr>      <chr>              <chr>  <chr>            <int>       <int>
    +#> 1 d1_1   n          " 15"              " 12"  "  5"                1           1
    +#> 2 d2_1   Mean (SD)  "3.8926 (0.83299)" "2.61… "2.63…               1           2
    +#> 3 d3_1   Median     "3.7300"           "2.70… "2.77…               1           3
    +#> 4 d4_1   Q1, Q3     "3.4500, 3.9575"   "2.13… "2.14…               1           4
    +#> 5 d5_1   Min, Max   "2.465, 5.424"     "1.61… "1.51…               1           5
    +#> 6 d6_1   Missing    "  0"              "  0"  "  0"                1           6
    +
    +get_metadata(t)
    +#> # A tibble: 6 × 5
    +#>   row_id row_label1 var1_3     var1_4     var1_5    
    +#>   <chr>  <chr>      <list>     <list>     <list>    
    +#> 1 d1_1   n          <tplyr_mt> <tplyr_mt> <tplyr_mt>
    +#> 2 d2_1   Mean (SD)  <tplyr_mt> <tplyr_mt> <tplyr_mt>
    +#> 3 d3_1   Median     <tplyr_mt> <tplyr_mt> <tplyr_mt>
    +#> 4 d4_1   Q1, Q3     <tplyr_mt> <tplyr_mt> <tplyr_mt>
    +#> 5 d5_1   Min, Max   <tplyr_mt> <tplyr_mt> <tplyr_mt>
    +#> 6 d6_1   Missing    <tplyr_mt> <tplyr_mt> <tplyr_mt>
    +
    +
    +
    + + +
    + + + +
    + + + + + + + diff --git a/docs/reference/get_numeric_data.html b/docs/reference/get_numeric_data.html index 87508d66..8e837825 100644 --- a/docs/reference/get_numeric_data.html +++ b/docs/reference/get_numeric_data.html @@ -1,224 +1,124 @@ - - - - - - - -Retrieve the numeric data from a tplyr objects — get_numeric_data • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + Skip to contents + +
    -
    - +
    +

    Examples

    +
    # Load in pipe
    +library(magrittr)
    +
    +t <- tplyr_table(mtcars, gear) %>%
    + add_layer(name='drat',
    +           group_desc(drat)
    + ) %>%
    + add_layer(name='cyl',
    +           group_count(cyl)
    + )
    +
    + # Return a list of the numeric data frames
    + get_numeric_data(t)
    +#> $drat
    +#> # A tibble: 27 × 4
    +#>    summary_var gear  stat     value
    +#>    <chr>       <fct> <chr>    <dbl>
    +#>  1 drat        3     n       15    
    +#>  2 drat        3     mean     3.13 
    +#>  3 drat        3     sd       0.274
    +#>  4 drat        3     median   3.08 
    +#>  5 drat        3     q1       3.04 
    +#>  6 drat        3     q3       3.18 
    +#>  7 drat        3     min      2.76 
    +#>  8 drat        3     max      3.73 
    +#>  9 drat        3     missing  0    
    +#> 10 drat        4     n       12    
    +#> # … with 17 more rows
    +#> 
    +#> $cyl
    +#> # A tibble: 9 × 6
    +#>   gear  summary_var     n distinct_n distinct_total total
    +#>   <chr> <chr>       <dbl>      <dbl>          <int> <int>
    +#> 1 3     4               1          1              3    15
    +#> 2 3     6               2          1              3    15
    +#> 3 3     8              12          1              3    15
    +#> 4 4     4               8          1              2    12
    +#> 5 4     6               4          1              2    12
    +#> 6 4     8               0          0              2    12
    +#> 7 5     4               2          1              3     5
    +#> 8 5     6               1          1              3     5
    +#> 9 5     8               2          1              3     5
    +#> 
    +
    + # Get the data from a specific layer
    + get_numeric_data(t, layer='drat')
    +#> # A tibble: 27 × 4
    +#>    summary_var gear  stat     value
    +#>    <chr>       <fct> <chr>    <dbl>
    +#>  1 drat        3     n       15    
    +#>  2 drat        3     mean     3.13 
    +#>  3 drat        3     sd       0.274
    +#>  4 drat        3     median   3.08 
    +#>  5 drat        3     q1       3.04 
    +#>  6 drat        3     q3       3.18 
    +#>  7 drat        3     min      2.76 
    +#>  8 drat        3     max      3.73 
    +#>  9 drat        3     missing  0    
    +#> 10 drat        4     n       12    
    +#> # … with 17 more rows
    + get_numeric_data(t, layer=1)
    +#> # A tibble: 27 × 4
    +#>    summary_var gear  stat     value
    +#>    <chr>       <fct> <chr>    <dbl>
    +#>  1 drat        3     n       15    
    +#>  2 drat        3     mean     3.13 
    +#>  3 drat        3     sd       0.274
    +#>  4 drat        3     median   3.08 
    +#>  5 drat        3     q1       3.04 
    +#>  6 drat        3     q3       3.18 
    +#>  7 drat        3     min      2.76 
    +#>  8 drat        3     max      3.73 
    +#>  9 drat        3     missing  0    
    +#> 10 drat        4     n       12    
    +#> # … with 17 more rows
    +
    + # Choose multiple layers by name or index
    + get_numeric_data(t, layer=c('cyl', 'drat'))
    +#> $cyl
    +#> # A tibble: 9 × 6
    +#>   gear  summary_var     n distinct_n distinct_total total
    +#>   <chr> <chr>       <dbl>      <dbl>          <int> <int>
    +#> 1 3     4               1          1              3    15
    +#> 2 3     6               2          1              3    15
    +#> 3 3     8              12          1              3    15
    +#> 4 4     4               8          1              2    12
    +#> 5 4     6               4          1              2    12
    +#> 6 4     8               0          0              2    12
    +#> 7 5     4               2          1              3     5
    +#> 8 5     6               1          1              3     5
    +#> 9 5     8               2          1              3     5
    +#> 
    +#> $drat
    +#> # A tibble: 27 × 4
    +#>    summary_var gear  stat     value
    +#>    <chr>       <fct> <chr>    <dbl>
    +#>  1 drat        3     n       15    
    +#>  2 drat        3     mean     3.13 
    +#>  3 drat        3     sd       0.274
    +#>  4 drat        3     median   3.08 
    +#>  5 drat        3     q1       3.04 
    +#>  6 drat        3     q3       3.18 
    +#>  7 drat        3     min      2.76 
    +#>  8 drat        3     max      3.73 
    +#>  9 drat        3     missing  0    
    +#> 10 drat        4     n       12    
    +#> # … with 17 more rows
    +#> 
    + get_numeric_data(t, layer=c(2, 1))
    +#> $cyl
    +#> # A tibble: 9 × 6
    +#>   gear  summary_var     n distinct_n distinct_total total
    +#>   <chr> <chr>       <dbl>      <dbl>          <int> <int>
    +#> 1 3     4               1          1              3    15
    +#> 2 3     6               2          1              3    15
    +#> 3 3     8              12          1              3    15
    +#> 4 4     4               8          1              2    12
    +#> 5 4     6               4          1              2    12
    +#> 6 4     8               0          0              2    12
    +#> 7 5     4               2          1              3     5
    +#> 8 5     6               1          1              3     5
    +#> 9 5     8               2          1              3     5
    +#> 
    +#> $drat
    +#> # A tibble: 27 × 4
    +#>    summary_var gear  stat     value
    +#>    <chr>       <fct> <chr>    <dbl>
    +#>  1 drat        3     n       15    
    +#>  2 drat        3     mean     3.13 
    +#>  3 drat        3     sd       0.274
    +#>  4 drat        3     median   3.08 
    +#>  5 drat        3     q1       3.04 
    +#>  6 drat        3     q3       3.18 
    +#>  7 drat        3     min      2.76 
    +#>  8 drat        3     max      3.73 
    +#>  9 drat        3     missing  0    
    +#> 10 drat        4     n       12    
    +#> # … with 17 more rows
    +#> 
    +
    + # Get the data and filter it
    + get_numeric_data(t, layer='drat', where = gear==3)
    +#> # A tibble: 9 × 4
    +#>   summary_var gear  stat     value
    +#>   <chr>       <fct> <chr>    <dbl>
    +#> 1 drat        3     n       15    
    +#> 2 drat        3     mean     3.13 
    +#> 3 drat        3     sd       0.274
    +#> 4 drat        3     median   3.08 
    +#> 5 drat        3     q1       3.04 
    +#> 6 drat        3     q3       3.18 
    +#> 7 drat        3     min      2.76 
    +#> 8 drat        3     max      3.73 
    +#> 9 drat        3     missing  0    
    +
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/get_stats_data.html b/docs/reference/get_stats_data.html index 84df0870..bc453b6a 100644 --- a/docs/reference/get_stats_data.html +++ b/docs/reference/get_stats_data.html @@ -1,228 +1,128 @@ - - - - - - - -Get statistics data — get_stats_data • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + Skip to contents + +
    -
    - +
    +

    Examples

    +
    library(magrittr)
    +
    +t <- tplyr_table(mtcars, gear) %>%
    +  add_layer(name='drat',
    +            group_desc(drat)
    +  ) %>%
    +  add_layer(name="cyl",
    +            group_count(cyl)
    +  ) %>%
    +  add_layer(name="am",
    +            group_count(am) %>%
    +              add_risk_diff(c('4', '3'))
    +  ) %>%
    +  add_layer(name="carb",
    +            group_count(carb) %>%
    +              add_risk_diff(c('4', '3'))
    +  )
    +
    + # Returns a list of lists, containing stats data from each layer
    + get_stats_data(t)
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> Warning: Chi-squared approximation may be incorrect
    +#> $drat
    +#> list()
    +#> 
    +#> $cyl
    +#> list()
    +#> 
    +#> $am
    +#> $am$riskdiff
    +#> # A tibble: 10 × 3
    +#>    summary_var measure  `4_3`
    +#>    <chr>       <chr>    <dbl>
    +#>  1 0           comp     0.333
    +#>  2 0           ref      1    
    +#>  3 0           dif     -0.667
    +#>  4 0           low     -1    
    +#>  5 0           high    -0.325
    +#>  6 1           comp     0.667
    +#>  7 1           ref      0    
    +#>  8 1           dif      0.667
    +#>  9 1           low      0.325
    +#> 10 1           high     1    
    +#> 
    +#> 
    +#> $carb
    +#> $carb$riskdiff
    +#> # A tibble: 30 × 3
    +#>    summary_var measure   `4_3`
    +#>    <chr>       <chr>     <dbl>
    +#>  1 1           comp     0.333 
    +#>  2 1           ref      0.2   
    +#>  3 1           dif      0.133 
    +#>  4 1           low     -0.277 
    +#>  5 1           high     0.543 
    +#>  6 2           comp     0.333 
    +#>  7 2           ref      0.267 
    +#>  8 2           dif      0.0667
    +#>  9 2           low     -0.348 
    +#> 10 2           high     0.481 
    +#> # … with 20 more rows
    +#> 
    +#> 
    +
    + # Returns just the riskdiff statistics from each layer - NULL
    + # for layers without riskdiff
    + get_stats_data(t, statistic="riskdiff")
    +#> $drat
    +#> NULL
    +#> 
    +#> $cyl
    +#> NULL
    +#> 
    +#> $am
    +#> # A tibble: 10 × 3
    +#>    summary_var measure  `4_3`
    +#>    <chr>       <chr>    <dbl>
    +#>  1 0           comp     0.333
    +#>  2 0           ref      1    
    +#>  3 0           dif     -0.667
    +#>  4 0           low     -1    
    +#>  5 0           high    -0.325
    +#>  6 1           comp     0.667
    +#>  7 1           ref      0    
    +#>  8 1           dif      0.667
    +#>  9 1           low      0.325
    +#> 10 1           high     1    
    +#> 
    +#> $carb
    +#> # A tibble: 30 × 3
    +#>    summary_var measure   `4_3`
    +#>    <chr>       <chr>     <dbl>
    +#>  1 1           comp     0.333 
    +#>  2 1           ref      0.2   
    +#>  3 1           dif      0.133 
    +#>  4 1           low     -0.277 
    +#>  5 1           high     0.543 
    +#>  6 2           comp     0.333 
    +#>  7 2           ref      0.267 
    +#>  8 2           dif      0.0667
    +#>  9 2           low     -0.348 
    +#> 10 2           high     0.481 
    +#> # … with 20 more rows
    +#> 
    +
    + # Return the statistic data for just the "am" layer - a list
    + get_stats_data(t, layer="am")
    +#> $riskdiff
    +#> # A tibble: 10 × 3
    +#>    summary_var measure  `4_3`
    +#>    <chr>       <chr>    <dbl>
    +#>  1 0           comp     0.333
    +#>  2 0           ref      1    
    +#>  3 0           dif     -0.667
    +#>  4 0           low     -1    
    +#>  5 0           high    -0.325
    +#>  6 1           comp     0.667
    +#>  7 1           ref      0    
    +#>  8 1           dif      0.667
    +#>  9 1           low      0.325
    +#> 10 1           high     1    
    +#> 
    + get_stats_data(t, layer=3)
    +#> $riskdiff
    +#> # A tibble: 10 × 3
    +#>    summary_var measure  `4_3`
    +#>    <chr>       <chr>    <dbl>
    +#>  1 0           comp     0.333
    +#>  2 0           ref      1    
    +#>  3 0           dif     -0.667
    +#>  4 0           low     -1    
    +#>  5 0           high    -0.325
    +#>  6 1           comp     0.667
    +#>  7 1           ref      0    
    +#>  8 1           dif      0.667
    +#>  9 1           low      0.325
    +#> 10 1           high     1    
    +#> 
    +
    + # Return the statistic data for just the "am" and "cyl", layer - a
    + # list of lists
    + get_stats_data(t, layer=c("am", "cyl"))
    +#> $am
    +#> $am$riskdiff
    +#> # A tibble: 10 × 3
    +#>    summary_var measure  `4_3`
    +#>    <chr>       <chr>    <dbl>
    +#>  1 0           comp     0.333
    +#>  2 0           ref      1    
    +#>  3 0           dif     -0.667
    +#>  4 0           low     -1    
    +#>  5 0           high    -0.325
    +#>  6 1           comp     0.667
    +#>  7 1           ref      0    
    +#>  8 1           dif      0.667
    +#>  9 1           low      0.325
    +#> 10 1           high     1    
    +#> 
    +#> 
    +#> $cyl
    +#> list()
    +#> 
    + get_stats_data(t, layer=c(3, 2))
    +#> $am
    +#> $am$riskdiff
    +#> # A tibble: 10 × 3
    +#>    summary_var measure  `4_3`
    +#>    <chr>       <chr>    <dbl>
    +#>  1 0           comp     0.333
    +#>  2 0           ref      1    
    +#>  3 0           dif     -0.667
    +#>  4 0           low     -1    
    +#>  5 0           high    -0.325
    +#>  6 1           comp     0.667
    +#>  7 1           ref      0    
    +#>  8 1           dif      0.667
    +#>  9 1           low      0.325
    +#> 10 1           high     1    
    +#> 
    +#> 
    +#> $cyl
    +#> list()
    +#> 
    +
    + # Return just the statistic data for "am" and "cyl" - a list
    + get_stats_data(t, layer=c("am", "cyl"), statistic="riskdiff")
    +#> $am
    +#> # A tibble: 10 × 3
    +#>    summary_var measure  `4_3`
    +#>    <chr>       <chr>    <dbl>
    +#>  1 0           comp     0.333
    +#>  2 0           ref      1    
    +#>  3 0           dif     -0.667
    +#>  4 0           low     -1    
    +#>  5 0           high    -0.325
    +#>  6 1           comp     0.667
    +#>  7 1           ref      0    
    +#>  8 1           dif      0.667
    +#>  9 1           low      0.325
    +#> 10 1           high     1    
    +#> 
    +#> $cyl
    +#> NULL
    +#> 
    + get_stats_data(t, layer=c(3, 2), statistic="riskdiff")
    +#> $am
    +#> # A tibble: 10 × 3
    +#>    summary_var measure  `4_3`
    +#>    <chr>       <chr>    <dbl>
    +#>  1 0           comp     0.333
    +#>  2 0           ref      1    
    +#>  3 0           dif     -0.667
    +#>  4 0           low     -1    
    +#>  5 0           high    -0.325
    +#>  6 1           comp     0.667
    +#>  7 1           ref      0    
    +#>  8 1           dif      0.667
    +#>  9 1           low      0.325
    +#> 10 1           high     1    
    +#> 
    +#> $cyl
    +#> NULL
    +#> 
    +
    +
    + # Return the riskdiff for the "am" layer - a data frame
    + get_stats_data(t, layer="am", statistic="riskdiff")
    +#> # A tibble: 10 × 3
    +#>    summary_var measure  `4_3`
    +#>    <chr>       <chr>    <dbl>
    +#>  1 0           comp     0.333
    +#>  2 0           ref      1    
    +#>  3 0           dif     -0.667
    +#>  4 0           low     -1    
    +#>  5 0           high    -0.325
    +#>  6 1           comp     0.667
    +#>  7 1           ref      0    
    +#>  8 1           dif      0.667
    +#>  9 1           low      0.325
    +#> 10 1           high     1    
    +
    + # Return and filter the riskdiff for the am layer - a data frame
    + get_stats_data(t, layer="am", statistic="riskdiff", where = summary_var==1)
    +#> # A tibble: 5 × 3
    +#>   summary_var measure `4_3`
    +#>   <chr>       <chr>   <dbl>
    +#> 1 1           comp    0.667
    +#> 2 1           ref     0    
    +#> 3 1           dif     0.667
    +#> 4 1           low     0.325
    +#> 5 1           high    1    
    +
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/header_n.html b/docs/reference/header_n.html index 852a7431..b44c0df0 100644 --- a/docs/reference/header_n.html +++ b/docs/reference/header_n.html @@ -1,230 +1,131 @@ - - - - - - - -Return or set header_n binding — header_n • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Return or set header_n binding — header_n • Tplyr - - - - - + + Skip to contents + +
    -
    - - -
    +
    +

    Examples

    +
    tab <- tplyr_table(mtcars, gear)
    +
    +header_n(tab) <- data.frame(
    +  gear = c(3, 4, 5),
    +  n = c(10, 15, 45)
    +)
    +
    +
    +
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/index.html b/docs/reference/index.html index 7d3e2a2c..8273fad3 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -1,567 +1,466 @@ - - - - - - - -Function reference • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Function reference • Tplyr - - - - - - - - - - -
    -
    -
    + + + -
    +
    + + build() +
    +
    Trigger the execution of the tplyr_table
    +
    + + tplyr_table() +
    +
    Create a Tplyr table object
    +
    + + pop_data() `pop_data<-`() set_pop_data() +
    +
    Return or set population data bindings
    +
    + + pop_treat_var() set_pop_treat_var() +
    +
    Return or set pop_treat_var binding
    +
    +

    Layers and layering

    + +

    Creating layers and adding them to the table

    -
    -
    - + +
    + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -

    Build

    -

    High-level functions to create and build a table

    -
    -

    build()

    -

    Trigger the execution of the tplyr_table

    -

    tplyr_table()

    -

    Create a Tplyr table object

    -

    pop_data() `pop_data<-`() set_pop_data()

    -

    Return or set population data bindings

    -

    pop_treat_var() set_pop_treat_var()

    -

    Return or set pop_treat_var binding

    -

    Layers and layering

    -

    Creating layers and adding them to the table

    -
    -

    add_layer() add_layers()

    -

    Attach a layer to a tplyr_table object

    -

    tplyr_layer()

    -

    Create a tplyr_layer object

    -

    group_count() group_desc() group_shift()

    -

    Create a count, desc, or shift layer for discrete count -based summaries, descriptive statistics summaries, or shift count summaries

    -

    Formatting

    -

    Customizing the display as a table

    -
    -

    f_str()

    -

    Create a f_str object

    -

    get_desc_layer_formats() set_desc_layer_formats() get_count_layer_formats() set_count_layer_formats() get_shift_layer_formats() set_shift_layer_formats()

    -

    Get or set the default format strings for descriptive statistics layers

    -

    set_format_strings()

    -

    Set the format strings and associated summaries to be performed in a layer

    -

    set_missing_count()

    -

    Set the display for missing strings

    -

    Sorting

    -

    Sorting and customizing table order

    -
    -

    set_order_count_method() set_ordering_cols() set_result_order_var()

    -

    Set the ordering logic for the count layer

    -

    set_outer_sort_position()

    -

    Set the value of a outer nested count layer to Inf or -Inf

    -

    Adding Groups and Stats

    -

    -
    -

    add_total_row()

    -

    Add a Total row into a count summary.

    -

    add_treat_grps() add_total_group() treat_grps()

    -

    Combine existing treatment groups for summary

    -

    add_risk_diff()

    -

    Add risk difference to a count layer

    -

    set_total_row_label()

    -

    Set the label for the total row

    -

    Summary Functions

    -

    -
    -

    set_custom_summaries()

    -

    Set custom summaries to be performed within a descriptive statistics layer

    -

    get_precision_by() set_precision_by()

    -

    Set or return precision_by layer binding

    -

    get_precision_on() set_precision_on()

    -

    Set or return precision_on layer binding

    -

    Counting functions

    -

    -
    -

    set_denoms_by()

    -

    Set variables used in pct denominator calculation

    -

    set_distinct_by()

    -

    Set counts to be distinct by some grouping variable.

    -

    set_denom_where()

    -

    Set Logic for denominator subsetting

    -

    set_nest_count()

    -

    Set the option to nest count layers

    -

    set_outer_sort_position()

    -

    Set the value of a outer nested count layer to Inf or -Inf

    -

    set_missing_count()

    -

    Set the display for missing strings

    -

    keep_levels()

    -

    Select levels to keep in a count layer

    -

    set_denom_ignore()

    -

    Set values the denominator calculation will ignore

    -

    set_indentation()

    -

    Set the option to prefix the row_labels in the inner count_layer

    -

    Column Headers

    -

    -
    -

    add_column_headers()

    -

    Attach column headers to a Tplyr output

    -

    header_n() `header_n<-`() set_header_n()

    -

    Return or set header_n binding

    -

    Helper functions

    -

    -
    -

    apply_row_masks()

    -

    Replace repeating row label variables with blanks in preparation for display.

    -

    get_numeric_data()

    -

    Retrieve the numeric data from a tplyr objects

    -

    get_stats_data()

    -

    Get statistics data

    -

    get_by() set_by()

    -

    Set or return by layer binding

    -

    get_target_var() set_target_var()

    -

    Set or return treat_var binding

    -

    treat_var() set_treat_var()

    -

    Return or set the treatment variable binding

    -

    get_where() set_where() set_pop_where() get_pop_where()

    -

    Set or return where binding for layer or table

    -
    +
    + + add_layer() add_layers() +
    +
    Attach a layer to a tplyr_table object
    +
    + + tplyr_layer() +
    +
    Create a tplyr_layer object
    +
    + + group_count() group_desc() group_shift() +
    +
    Create a count, desc, or shift layer for discrete count +based summaries, descriptive statistics summaries, or shift count summaries
    +
    +

    Formatting

    + +

    Customizing the display as a table

    - -
    + +
    + + + +
    + + f_str() +
    +
    Create a f_str object
    +
    + + get_desc_layer_formats() set_desc_layer_formats() get_count_layer_formats() set_count_layer_formats() get_shift_layer_formats() set_shift_layer_formats() +
    +
    Get or set the default format strings for descriptive statistics layers
    +
    + + set_format_strings() +
    +
    Set the format strings and associated summaries to be performed in a layer
    +
    + + set_missing_count() +
    +
    Set the display for missing strings
    +
    +

    Sorting

    + +

    Sorting and customizing table order

    -
    + + +
    -
    -

    Site built with pkgdown 1.6.1.

    + -
    -
    +
    + - - - + diff --git a/docs/reference/keep_levels.html b/docs/reference/keep_levels.html index bd60eb48..a6b3e91c 100644 --- a/docs/reference/keep_levels.html +++ b/docs/reference/keep_levels.html @@ -1,193 +1,92 @@ - - - - - - - -Select levels to keep in a count layer — keep_levels • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + Skip to contents + +
    -
    - - -
    +
    +

    Examples

    +
    library(dplyr)
    +mtcars <- mtcars %>%
    +  mutate_all(as.character)
    +
    +t <- tplyr_table(mtcars, gear) %>%
    +  add_layer(
    +    group_count(cyl) %>%
    +      keep_levels("4", "8") %>%
    +      set_denom_where(cyl %in% c("4", "8"))
    + ) %>%
    + build()
    +
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/layer_attachment.html b/docs/reference/layer_attachment.html index daff8725..1c335dff 100644 --- a/docs/reference/layer_attachment.html +++ b/docs/reference/layer_attachment.html @@ -1,57 +1,9 @@ - - - - - - - -Attach a layer to a tplyr_table object — add_layer • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Attach a layer to a tplyr_table object — add_layer • Tplyr - - - - - + + Skip to contents + +
    -
    - - -
    +
    +

    Examples

    +
    # Load in pipe
    +library(magrittr)
    +
    +## Single layer
    +t <- tplyr_table(mtcars, cyl) %>%
    +  add_layer(
    +    group_desc(target_var=mpg)
    +  )
    +
    +## Single layer with name
    +t <- tplyr_table(mtcars, cyl) %>%
    +  add_layer(name='mpg',
    +    group_desc(target_var=mpg)
    +  )
    +
    +# Using add_layers
    +t <- tplyr_table(mtcars, cyl)
    +l1 <- group_desc(t, target_var=mpg)
    +l2 <- group_count(t, target_var=cyl)
    +
    +t <- add_layers(t, l1, 'cyl' = l2)
    +
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/layer_constructors.html b/docs/reference/layer_constructors.html index 74a4ab57..b286a70f 100644 --- a/docs/reference/layer_constructors.html +++ b/docs/reference/layer_constructors.html @@ -1,196 +1,95 @@ - - - - - - - -Create a count, desc, or shift layer for discrete count -based summaries, descriptive statistics summaries, or shift count summaries — group_count • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create a count, desc, or shift layer for discrete count +based summaries, descriptive statistics summaries, or shift count summaries — group_count • Tplyr - - - - - + + Skip to contents + +
    -
    - -
    - -
    +
    +

    Examples

    +
    # Load in pipe
    +library(magrittr)
    +
    +t <- tplyr_table(iris, Species) %>%
    +  add_layer(
    +    group_desc(target_var=Sepal.Width)
    +  )
    +
    +t <- tplyr_table(iris, Species) %>%
    +  add_layer(
    +    group_desc(target_var=Sepal.Width)
    +  )
    +
    +t <- tplyr_table(mtcars, am) %>%
    +  add_layer(
    +    group_shift(vars(row=gear, column=carb), by=cyl)
    +  )
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/ordering.html b/docs/reference/ordering.html index f2b4993c..5e9120af 100644 --- a/docs/reference/ordering.html +++ b/docs/reference/ordering.html @@ -1,53 +1,5 @@ - - - - - - - -Set the ordering logic for the count layer — set_order_count_method • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + Skip to contents + +
    -
    - - -
    +
    +

    Examples

    +
    library(dplyr)
    +
    +# Default sorting by factor
    +t <- tplyr_table(mtcars, gear) %>%
    +  add_layer(
    +    group_count(cyl)
    +  )
    +build(t)
    +#> # A tibble: 3 × 6
    +#>   row_label1 var1_3        var1_4        var1_5      ord_layer_index ord_layer_1
    +#>   <chr>      <chr>         <chr>         <chr>                 <int>       <dbl>
    +#> 1 4          " 1 (  6.7%)" " 8 ( 66.7%)" " 2 ( 40.0…               1           1
    +#> 2 6          " 2 ( 13.3%)" " 4 ( 33.3%)" " 1 ( 20.0…               1           2
    +#> 3 8          "12 ( 80.0%)" " 0 (  0.0%)" " 2 ( 40.0…               1           3
    +
    +# Sorting by <VAR>N
    +mtcars$cylN <- mtcars$cyl
    +t <- tplyr_table(mtcars, gear) %>%
    +  add_layer(
    +    group_count(cyl) %>%
    +      set_order_count_method("byvarn")
    +  )
    +
    +# Sorting by row count
    +t <- tplyr_table(mtcars, gear) %>%
    +  add_layer(
    +    group_count(cyl) %>%
    +      set_order_count_method("bycount") %>%
    +      # Orders based on the 6 gear group
    +      set_ordering_cols(6)
    +  )
    +
    +# Sorting by row count by percentages
    +t <- tplyr_table(mtcars, gear) %>%
    +  add_layer(
    +    group_count(cyl) %>%
    +      set_order_count_method("bycount") %>%
    +      set_result_order_var(pct)
    +  )
    +
    +# Sorting when you have column arguments in the table
    +t <- tplyr_table(mtcars, gear, cols = vs) %>%
    +  add_layer(
    +    group_count(cyl) %>%
    +      # Uses the fourth gear group and the 0 vs group in ordering
    +      set_ordering_cols(4, 0)
    +  )
    +
    +# Using a custom factor to order
    +mtcars$cyl <- factor(mtcars$cyl, c(6, 4, 8))
    +t <- tplyr_table(mtcars, gear) %>%
    +  add_layer(
    +    group_count(cyl) %>%
    +      # This is the default but can be used to change the setting if it is
    +      #set at the table level.
    +      set_order_count_method("byfactor")
    +  )
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/pipe.html b/docs/reference/pipe.html new file mode 100644 index 00000000..3e4f4c30 --- /dev/null +++ b/docs/reference/pipe.html @@ -0,0 +1,126 @@ + +Pipe operator — %>% • Tplyr + Skip to contents + + +
    +
    +
    + +
    +

    See magrittr::%>% for details.

    +
    + +
    +

    Usage

    +
    lhs %>% rhs
    +
    + +
    +

    Arguments

    +
    lhs
    +

    A value or the magrittr placeholder.

    + + +
    rhs
    +

    A function call using the magrittr semantics.

    + +
    +
    +

    Value

    + + +

    The result of calling `rhs(lhs)`.

    +
    + +
    + + +
    + + + +
    + + + + + + + diff --git a/docs/reference/pop_data.html b/docs/reference/pop_data.html index cb6f5352..e51d1f93 100644 --- a/docs/reference/pop_data.html +++ b/docs/reference/pop_data.html @@ -1,193 +1,92 @@ - - - - - - - -Return or set population data bindings — pop_data • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + Skip to contents + +
    -
    - +
    +
    +

    Value

    + + +

    For tplyr_pop_data the pop_data binding of the

    +

    +

    tplyr_table object. For tplyr_pop_data<- nothing is returned, + the pop_data binding is set silently. For set_tplyr_pop_data the + modified object.

    +
    + +
    +

    Examples

    +
    tab <- tplyr_table(iris, Species)
    +
    +pop_data(tab) <- mtcars
    +
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/pop_treat_var.html b/docs/reference/pop_treat_var.html index 0a44ea82..038e90b9 100644 --- a/docs/reference/pop_treat_var.html +++ b/docs/reference/pop_treat_var.html @@ -1,269 +1,165 @@ - - - - - - - -Return or set pop_treat_var binding — pop_treat_var • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + Skip to contents + +
    -
    - +
    +

    Examples

    +
    tab <- tplyr_table(iris, Species)
    +
    +pop_data(tab) <- mtcars
    +set_pop_treat_var(tab, mpg)
    +#> *** tplyr_table ***
    +#> Target (data.frame):
    +#> 	Name:  iris
    +#> 	Rows:  150
    +#> 	Columns:  5 
    +#> pop_data (data.frame)
    +#> 	Name:  value 
    +#> 	Rows:  32 
    +#> 	Columns:  11 
    +#> treat_var variable (quosure)
    +#> 	Speciespop_treat_var variable (quosure)
    +#> 	mpg
    +#> 
    +#> header_n:  header groups
    +#> treat_grps groupings (list)
    +#> Table Columns (cols):
    +#> where: TRUE
    +#> Number of layer(s): 0
    +#> layer_output: 0
    +
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/precision_by.html b/docs/reference/precision_by.html index 6a4a1977..6a1b552a 100644 --- a/docs/reference/precision_by.html +++ b/docs/reference/precision_by.html @@ -1,193 +1,92 @@ - - - - - - - -Set or return precision_by layer binding — get_precision_by • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + Skip to contents + +
    -
    - - -
    +
    +

    Examples

    +
    # Load in pipe
    +library(magrittr)
    +lay <- tplyr_table(mtcars, gear) %>%
    +  add_layer(
    +    group_desc(mpg, by=vars(carb, am)) %>%
    +    set_precision_by(carb)
    +  )
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/precision_on.html b/docs/reference/precision_on.html index 1abbafb8..7b2fc3c6 100644 --- a/docs/reference/precision_on.html +++ b/docs/reference/precision_on.html @@ -1,253 +1,147 @@ - - - - - - - -Set or return precision_on layer binding — get_precision_on • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    -
    +
    + +
    +
    -
    - - -
    +
    +

    Examples

    +
    # Load in pipe
    +library(magrittr)
    +lay <- tplyr_table(mtcars, gear) %>%
    +  add_layer(
    +    group_desc(vars(mpg, disp), by=vars(carb, am)) %>%
    +    set_precision_on(disp)
    +  )
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/process_formatting.html b/docs/reference/process_formatting.html index c83c2443..7e431852 100644 --- a/docs/reference/process_formatting.html +++ b/docs/reference/process_formatting.html @@ -1,236 +1,126 @@ - - - - - - - -Process layers to get formatted and pivoted tables. — process_formatting • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Process layers to get formatted and pivoted tables. — process_formatting • Tplyr - - - - - - - - - - -
    -
    -
    +
    + +
    +
    -
    - +
    +

    Value

    + + +

    The formatted_table object that is bound to the layer

    +
    + +
    -
    - +
    + - - - + diff --git a/docs/reference/process_statistic_data.html b/docs/reference/process_statistic_data.html index d94a1177..f67ed83d 100644 --- a/docs/reference/process_statistic_data.html +++ b/docs/reference/process_statistic_data.html @@ -1,238 +1,129 @@ - - - - - - - -Process a tplyr_statistic object — process_statistic_data • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Process a tplyr_statistic object — process_statistic_data • Tplyr - - - - - - - - - - -
    -
    -
    +
    + +
    +
    -
    - +
    +

    Value

    + + +

    Numeric statistc data from a tplyr statistc

    +
    + +
    -
    - +
    + - - - + diff --git a/docs/reference/process_statistic_formatting.html b/docs/reference/process_statistic_formatting.html index 0a9d0de4..a4a05a14 100644 --- a/docs/reference/process_statistic_formatting.html +++ b/docs/reference/process_statistic_formatting.html @@ -1,238 +1,129 @@ - - - - - - - -Process string formatting on a tplyr_statistic object — process_statistic_formatting • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Process string formatting on a tplyr_statistic object — process_statistic_formatting • Tplyr - - - - - - - - - - -
    -
    -
    +
    + +
    +
    -
    - +
    +

    Value

    + + +

    Formatted tplyr_statistic data

    +
    + +
    -
    - +
    + - - - + diff --git a/docs/reference/process_summaries.html b/docs/reference/process_summaries.html index 1e71ca34..818b7107 100644 --- a/docs/reference/process_summaries.html +++ b/docs/reference/process_summaries.html @@ -1,236 +1,126 @@ - - - - - - - -Process layers to get numeric results of layer — process_summaries • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Process layers to get numeric results of layer — process_summaries • Tplyr - - - - - - - - - - -
    -
    -
    +
    + +
    +
    -
    - +
    +

    Value

    + + +

    The tplyr_layer object with a 'built_table' binding

    +
    + +
    -
    - +
    + - - - + diff --git a/docs/reference/set_custom_summaries.html b/docs/reference/set_custom_summaries.html index 71c4a573..1f895ab7 100644 --- a/docs/reference/set_custom_summaries.html +++ b/docs/reference/set_custom_summaries.html @@ -1,221 +1,123 @@ - - - - - - - -Set custom summaries to be performed within a descriptive statistics layer — set_custom_summaries • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + Skip to contents + +
    -
    - +
    +

    Examples

    +
    #Load in pipe
    +library(magrittr)
    +
    +tplyr_table(iris, Species) %>%
    +  add_layer(
    +    group_desc(Sepal.Length, by = "Sepal Length") %>%
    +      set_custom_summaries(
    +        geometric_mean = exp(sum(log(.var[.var > 0]),
    +                                     na.rm=TRUE) / length(.var))
    +      ) %>%
    +      set_format_strings(
    +        'Geometric Mean' = f_str('xx.xx', geometric_mean)
    +      )
    +  ) %>%
    +  build()
    +#> # A tibble: 1 × 8
    +#>   row_label1   row_label2     var1_setosa var1_versicolor var1_virginica
    +#>   <chr>        <chr>          <chr>       <chr>           <chr>         
    +#> 1 Sepal Length Geometric Mean " 4.99"     " 5.91"         " 6.56"       
    +#> # … with 3 more variables: ord_layer_index <int>, ord_layer_1 <int>,
    +#> #   ord_layer_2 <int>
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/set_denom_ignore.html b/docs/reference/set_denom_ignore.html index c4205c43..c74f52b8 100644 --- a/docs/reference/set_denom_ignore.html +++ b/docs/reference/set_denom_ignore.html @@ -1,193 +1,92 @@ - - - - - - - -Set values the denominator calculation will ignore — set_denom_ignore • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Set values the denominator calculation will ignore — set_denom_ignore • Tplyr - - - - - + + Skip to contents + +
    -
    - +

    The modified layer object

    +
    + +
    +

    Examples

    +
    library(magrittr)
    +mtcars2 <- mtcars
    +mtcars2[mtcars$cyl == 6, "cyl"] <- NA
    +mtcars2[mtcars$cyl == 8, "cyl"] <- "Not Found"
    +
    +tplyr_table(mtcars2, gear) %>%
    +  add_layer(
    +    group_count(cyl) %>%
    +      set_missing_count(f_str("xx ", n), Missing = c(NA, "Not Found"))
    +      # This function is currently deprecated. It was replaced with an
    +      # argument in set_missing_count
    +      # set_denom_ignore("Missing")
    +  ) %>%
    +  build()
    +#> # A tibble: 2 × 6
    +#>   row_label1 var1_3        var1_4        var1_5      ord_layer_index ord_layer_1
    +#>   <chr>      <chr>         <chr>         <chr>                 <int>       <dbl>
    +#> 1 4          " 1 (  6.7%)" " 8 ( 66.7%)" " 2 ( 40.0…               1           1
    +#> 2 Missing    "14 "         " 4 "         " 3 "                     1           3
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/set_denom_where.html b/docs/reference/set_denom_where.html index ef2473a8..d47fc3f2 100644 --- a/docs/reference/set_denom_where.html +++ b/docs/reference/set_denom_where.html @@ -1,257 +1,152 @@ - - - - - - - -Set Logic for denominator subsetting — set_denom_where • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + Skip to contents + +
    -
    - +
    +

    Examples

    +
    library(magrittr)
    +t10 <- tplyr_table(mtcars, gear) %>%
    +  add_layer(
    +    group_count(cyl, where = cyl != 6) %>%
    +    set_denom_where(TRUE)
    +    # The denominators will be based on all of the values, including 6
    +  ) %>%
    + build()
    +
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/set_denoms_by.html b/docs/reference/set_denoms_by.html index 9edad3dc..6eb990b8 100644 --- a/docs/reference/set_denoms_by.html +++ b/docs/reference/set_denoms_by.html @@ -1,312 +1,210 @@ - - - - - - - -Set variables used in pct denominator calculation — set_denoms_by • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + Skip to contents + +
    -
    - +

    The modified layer object

    +
    + +
    +

    Examples

    +
    library(magrittr)
    +
    +# Default has matrix of treatment group, additional columns,
    +# and by variables sum to 1
    +tplyr_table(mtcars, am) %>%
    +  add_layer(
    +    group_shift(vars(row=gear, column=carb), by=cyl) %>%
    +      set_format_strings(f_str("xxx (xx.xx%)", n, pct))
    +  ) %>%
    +  build()
    +#> # A tibble: 9 × 17
    +#>   row_label1 row_label2 var1_0_1    var1_0_2 var1_0_3 var1_0_4 var1_0_6 var1_0_8
    +#>   <chr>      <chr>      <chr>       <chr>    <chr>    <chr>    <chr>    <chr>   
    +#> 1 4          3          "  1 (33.3… "  0 ( … "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> 2 4          4          "  0 ( 0.0… "  2 (6… "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> 3 4          5          "  0 ( 0.0… "  0 ( … "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> 4 6          3          "  2 (50.0… "  0 ( … "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> 5 6          4          "  0 ( 0.0… "  0 ( … "  0 ( … "  2 (5… "  0 ( … "  0 ( …
    +#> 6 6          5          "  0 ( 0.0… "  0 ( … "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> 7 8          3          "  0 ( 0.0… "  4 (3… "  3 (2… "  5 (4… "  0 ( … "  0 ( …
    +#> 8 8          4          "  0 ( 0.0… "  0 ( … "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> 9 8          5          "  0 ( 0.0… "  0 ( … "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> # … with 9 more variables: var1_1_1 <chr>, var1_1_2 <chr>, var1_1_3 <chr>,
    +#> #   var1_1_4 <chr>, var1_1_6 <chr>, var1_1_8 <chr>, ord_layer_index <int>,
    +#> #   ord_layer_1 <dbl>, ord_layer_2 <dbl>
    +
    +tplyr_table(mtcars, am) %>%
    +  add_layer(
    +    group_shift(vars(row=gear, column=carb), by=cyl) %>%
    +      set_format_strings(f_str("xxx (xx.xx%)", n, pct)) %>%
    +      set_denoms_by(cyl, gear) # Row % sums to 1
    +  ) %>%
    +  build()
    +#> # A tibble: 9 × 17
    +#>   row_label1 row_label2 var1_0_1    var1_0_2 var1_0_3 var1_0_4 var1_0_6 var1_0_8
    +#>   <chr>      <chr>      <chr>       <chr>    <chr>    <chr>    <chr>    <chr>   
    +#> 1 4          3          "  1 (100.… "  0 ( … "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> 2 4          4          "  0 ( 0.0… "  2 (2… "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> 3 4          5          "  0 ( 0.0… "  0 ( … "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> 4 6          3          "  2 (100.… "  0 ( … "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> 5 6          4          "  0 ( 0.0… "  0 ( … "  0 ( … "  2 (5… "  0 ( … "  0 ( …
    +#> 6 6          5          "  0 ( 0.0… "  0 ( … "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> 7 8          3          "  0 ( 0.0… "  4 (3… "  3 (2… "  5 (4… "  0 ( … "  0 ( …
    +#> 8 8          4          "  0 ( 0.0… "  0 ( … "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> 9 8          5          "  0 ( 0.0… "  0 ( … "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> # … with 9 more variables: var1_1_1 <chr>, var1_1_2 <chr>, var1_1_3 <chr>,
    +#> #   var1_1_4 <chr>, var1_1_6 <chr>, var1_1_8 <chr>, ord_layer_index <int>,
    +#> #   ord_layer_1 <dbl>, ord_layer_2 <dbl>
    +
    +tplyr_table(mtcars, am) %>%
    +  add_layer(
    +    group_shift(vars(row=gear, column=carb), by=cyl) %>%
    +      set_format_strings(f_str("xxx (xx.xx%)", n, pct)) %>%
    +      set_denoms_by(cyl, gear, am) # % within treatment group sums to 1
    +  ) %>%
    +  build()
    +#> # A tibble: 9 × 17
    +#>   row_label1 row_label2 var1_0_1    var1_0_2 var1_0_3 var1_0_4 var1_0_6 var1_0_8
    +#>   <chr>      <chr>      <chr>       <chr>    <chr>    <chr>    <chr>    <chr>   
    +#> 1 4          3          "  1 (100.… "  0 ( … "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> 2 4          4          "  0 ( 0.0… "  2 (1… "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> 3 4          5          "  0 ( 0.0… "  0 ( … "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> 4 6          3          "  2 (100.… "  0 ( … "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> 5 6          4          "  0 ( 0.0… "  0 ( … "  0 ( … "  2 (1… "  0 ( … "  0 ( …
    +#> 6 6          5          "  0 ( 0.0… "  0 ( … "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> 7 8          3          "  0 ( 0.0… "  4 (3… "  3 (2… "  5 (4… "  0 ( … "  0 ( …
    +#> 8 8          4          "  0 ( 0.0… "  0 ( … "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> 9 8          5          "  0 ( 0.0… "  0 ( … "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> # … with 9 more variables: var1_1_1 <chr>, var1_1_2 <chr>, var1_1_3 <chr>,
    +#> #   var1_1_4 <chr>, var1_1_6 <chr>, var1_1_8 <chr>, ord_layer_index <int>,
    +#> #   ord_layer_1 <dbl>, ord_layer_2 <dbl>
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/set_distinct_by.html b/docs/reference/set_distinct_by.html index eef94635..2bb51b5c 100644 --- a/docs/reference/set_distinct_by.html +++ b/docs/reference/set_distinct_by.html @@ -1,264 +1,162 @@ - - - - - - - -Set counts to be distinct by some grouping variable. — set_distinct_by • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + Skip to contents + +
    -
    - +
    +

    Examples

    +
    #Load in pipe
    +library(magrittr)
    +
    +tplyr_table(mtcars, gear) %>%
    +  add_layer(
    +    group_count(cyl) %>%
    +      set_distinct_by(carb)
    +  ) %>%
    +  build()
    +#> # A tibble: 3 × 6
    +#>   row_label1 var1_3        var1_4        var1_5      ord_layer_index ord_layer_1
    +#>   <chr>      <chr>         <chr>         <chr>                 <int>       <dbl>
    +#> 1 4          " 1 (  8.3%)" " 2 ( 33.3%)" " 1 (  8.3…               1           1
    +#> 2 6          " 1 (  8.3%)" " 1 ( 16.7%)" " 1 (  8.3…               1           2
    +#> 3 8          " 3 ( 25.0%)" " 0 (  0.0%)" " 2 ( 16.7…               1           3
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/set_format_strings.html b/docs/reference/set_format_strings.html index e0da7485..652be955 100644 --- a/docs/reference/set_format_strings.html +++ b/docs/reference/set_format_strings.html @@ -1,230 +1,133 @@ - - - - - - - -Set the format strings and associated summaries to be performed in a layer — set_format_strings • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + Skip to contents + +
    -
    - - -
    +
    +

    Examples

    +
    # Load in pipe
    +library(magrittr)
    +
    +# In a count layer
    +tplyr_table(mtcars, gear) %>%
    +  add_layer(
    +    group_count(cyl) %>%
    +      set_format_strings(f_str('xx (xx%)', n, pct))
    +  ) %>%
    +  build()
    +#> # A tibble: 3 × 6
    +#>   row_label1 var1_3     var1_4     var1_5     ord_layer_index ord_layer_1
    +#>   <chr>      <chr>      <chr>      <chr>                <int>       <dbl>
    +#> 1 4          " 1 ( 7%)" " 8 (67%)" " 2 (40%)"               1           1
    +#> 2 6          " 2 (13%)" " 4 (33%)" " 1 (20%)"               1           2
    +#> 3 8          "12 (80%)" " 0 ( 0%)" " 2 (40%)"               1           3
    +
    +# In a descriptive statistics layer
    +tplyr_table(mtcars, gear) %>%
    +  add_layer(
    +    group_desc(mpg) %>%
    +      set_format_strings(
    +        "n"         = f_str("xx", n),
    +        "Mean (SD)" = f_str("xx.x", mean, empty='NA'),
    +        "SD"        = f_str("xx.xx", sd),
    +        "Median"    = f_str("xx.x", median),
    +        "Q1, Q3"    = f_str("xx, xx", q1, q3, empty=c(.overall='NA')),
    +        "Min, Max"  = f_str("xx, xx", min, max),
    +        "Missing"   = f_str("xx", missing)
    +      )
    +  ) %>%
    +  build()
    +#> # A tibble: 7 × 6
    +#>   row_label1 var1_3   var1_4   var1_5   ord_layer_index ord_layer_1
    +#>   <chr>      <chr>    <chr>    <chr>              <int>       <int>
    +#> 1 n          "15"     "12"     " 5"                   1           1
    +#> 2 Mean (SD)  "16.1"   "24.5"   "21.4"                 1           2
    +#> 3 SD         " 3.37"  " 5.28"  " 6.66"                1           3
    +#> 4 Median     "15.5"   "22.8"   "19.7"                 1           4
    +#> 5 Q1, Q3     "14, 18" "21, 28" "16, 26"               1           5
    +#> 6 Min, Max   "10, 22" "18, 34" "15, 30"               1           6
    +#> 7 Missing    " 0"     " 0"     " 0"                   1           7
    +
    +# In a shift layer
    +tplyr_table(mtcars, am) %>%
    +  add_layer(
    +    group_shift(vars(row=gear, column=carb), by=cyl) %>%
    +    set_format_strings(f_str("xxx (xx.xx%)", n, pct))
    +  ) %>%
    +  build()
    +#> # A tibble: 9 × 17
    +#>   row_label1 row_label2 var1_0_1    var1_0_2 var1_0_3 var1_0_4 var1_0_6 var1_0_8
    +#>   <chr>      <chr>      <chr>       <chr>    <chr>    <chr>    <chr>    <chr>   
    +#> 1 4          3          "  1 (33.3… "  0 ( … "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> 2 4          4          "  0 ( 0.0… "  2 (6… "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> 3 4          5          "  0 ( 0.0… "  0 ( … "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> 4 6          3          "  2 (50.0… "  0 ( … "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> 5 6          4          "  0 ( 0.0… "  0 ( … "  0 ( … "  2 (5… "  0 ( … "  0 ( …
    +#> 6 6          5          "  0 ( 0.0… "  0 ( … "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> 7 8          3          "  0 ( 0.0… "  4 (3… "  3 (2… "  5 (4… "  0 ( … "  0 ( …
    +#> 8 8          4          "  0 ( 0.0… "  0 ( … "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> 9 8          5          "  0 ( 0.0… "  0 ( … "  0 ( … "  0 ( … "  0 ( … "  0 ( …
    +#> # … with 9 more variables: var1_1_1 <chr>, var1_1_2 <chr>, var1_1_3 <chr>,
    +#> #   var1_1_4 <chr>, var1_1_6 <chr>, var1_1_8 <chr>, ord_layer_index <int>,
    +#> #   ord_layer_1 <dbl>, ord_layer_2 <dbl>
    +
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/set_indentation.html b/docs/reference/set_indentation.html index 209e62d1..9827ec13 100644 --- a/docs/reference/set_indentation.html +++ b/docs/reference/set_indentation.html @@ -1,241 +1,133 @@ - - - - - - - -Set the option to prefix the row_labels in the inner count_layer — set_indentation • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    -
    +
    + +
    +
    -
    - +
    +

    Value

    + + +

    The modified count_layer environment

    +
    + +
    -
    - +
    + - - - + diff --git a/docs/reference/set_missing_count.html b/docs/reference/set_missing_count.html index 2a6cf681..5cc2334d 100644 --- a/docs/reference/set_missing_count.html +++ b/docs/reference/set_missing_count.html @@ -1,273 +1,166 @@ - - - - - - - -Set the display for missing strings — set_missing_count • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Set the display for missing strings — set_missing_count • Tplyr - - - - - + + Skip to contents + +
    -
    - +

    The modified layer

    +
    + +
    +

    Examples

    +
    library(magrittr)
    +library(dplyr)
    +  mtcars2 <- mtcars %>%
    +mutate_all(as.character)
    +mtcars2[mtcars$cyl == 6, "cyl"] <- NA
    +
    +tplyr_table(mtcars2, gear) %>%
    +  add_layer(
    +    group_count(cyl) %>%
    +      set_missing_count(f_str("xx ", n), Missing = NA)
    +  ) %>%
    +  build()
    +#> # A tibble: 3 × 6
    +#>   row_label1 var1_3        var1_4        var1_5      ord_layer_index ord_layer_1
    +#>   <chr>      <chr>         <chr>         <chr>                 <int>       <dbl>
    +#> 1 4          " 1 (  6.7%)" " 8 ( 66.7%)" " 2 ( 40.0…               1           1
    +#> 2 8          "12 ( 80.0%)" " 0 (  0.0%)" " 2 ( 40.0…               1           2
    +#> 3 Missing    " 2 "         " 4 "         " 1 "                     1           3
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/set_nest_count.html b/docs/reference/set_nest_count.html index f4c24306..2cd58c2c 100644 --- a/docs/reference/set_nest_count.html +++ b/docs/reference/set_nest_count.html @@ -1,193 +1,92 @@ - - - - - - - -Set the option to nest count layers — set_nest_count • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    -
    +
    + +
    +
    -
    - +
    +

    Value

    + + +

    The modified layer

    +
    + +
    -
    - +
    + - - - + diff --git a/docs/reference/set_numeric_where.html b/docs/reference/set_numeric_where.html new file mode 100644 index 00000000..4c3783cc --- /dev/null +++ b/docs/reference/set_numeric_where.html @@ -0,0 +1,277 @@ + + + + + + + + +Set a numeric cutoff — set_numeric_where • Tplyr + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    +

    In certain tables, it may be necessary to only include rows that meet numeric +conditions. Rows that are less than a certain cutoff can be suppressed from +the output. This function allows you to pass a cutoff, a cutoff stat(n, +distinct_n, pct, or distinct_pct) to supress values that are lesser than the +cutoff.

    +
    + +
    set_numeric_where(e, numeric_cutoff, stat, column = NULL)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + +
    e

    A count_layer object

    numeric_cutoff

    A numeric value where only values greater than or equal +to will be displayed.

    stat

    The statistic to use when filtering out rows. Either 'n', +'distinct_n', or 'pct' are allowable

    column

    If only a particular column should be used to cutoff values, it +can be supplied here as a character value.

    + +

    Value

    + +

    The modified Tplyr layer object

    + +

    Examples

    +
    mtcars %>% +tplyr_table(gear) %>% + add_layer( + group_count(cyl) %>% + set_numeric_where(10, "n") %>% + add_total_row() %>% + set_order_count_method("bycount") + ) +
    #> *** tplyr_table *** +#> Target (data.frame): +#> Name: . +#> Rows: 32 +#> Columns: 11 +#> treat_var variable (quosure) +#> gear +#> header_n: header groups +#> treat_grps groupings (list) +#> Table Columns (cols): +#> where: TRUE +#> Number of layer(s): 1 +#> layer_output: 0
    +
    + +
    + + +
    + + +
    +

    Site built with pkgdown 1.6.1.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/set_outer_sort_position.html b/docs/reference/set_outer_sort_position.html index 6d44d9ec..6deca4cc 100644 --- a/docs/reference/set_outer_sort_position.html +++ b/docs/reference/set_outer_sort_position.html @@ -1,237 +1,127 @@ - - - - - - - -Set the value of a outer nested count layer to Inf or -Inf — set_outer_sort_position • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Set the value of a outer nested count layer to Inf or -Inf — set_outer_sort_position • Tplyr - - - - - - - - - - -
    -
    -
    +
    + +
    +
    -
    - +
    +

    Value

    + + +

    The modified count layer.

    +
    + +
    -
    - +
    + - - - + diff --git a/docs/reference/set_precision_data.html b/docs/reference/set_precision_data.html new file mode 100644 index 00000000..63a629fb --- /dev/null +++ b/docs/reference/set_precision_data.html @@ -0,0 +1,166 @@ + +Set precision data — set_precision_data • Tplyr + Skip to contents + + +
    +
    +
    + +
    +

    In some cases, there may be organizational standards surrounding decimal precision. +For example, there may be a specific standard around the representation of precision relating +to lab results. As such, set_precision_data() provides an interface to provide integer and +decimal precision from an external data source.

    +
    + +
    +

    Usage

    +
    set_precision_data(layer, prec, default = c("error", "auto"))
    +
    + +
    +

    Arguments

    +
    layer
    +

    A tplyr_layer object

    + + +
    prec
    +

    A dataframe following the structure specified in the function details

    + + +
    default
    +

    Handling of unspecified by variable groupings. Defaults to 'error'. Set to 'auto' to automatically infer any missing groups.

    + +
    +
    +

    Details

    +

    The ultimate behavior of this feature is just that of the existing auto precision method, except +that the precision is specified in the provided precision dataset rather than inferred from the source data. +At a minimum, the precision dataset must contain the integer variables max_int and max_dec. If by variables +are provided, those variables must be available in the layer by variables.

    +

    When the table is built, by default Tplyr will error if the precision dataset is missing by variable groupings +that exist in the target dataset. This can be overriden using the default parameter. If default is set to +"auto", any missing values will be automatically inferred from the source data.

    +
    + +
    +

    Examples

    +
    
    +prec <- tibble::tribble(
    +  ~vs, ~max_int, ~max_dec,
    +  0,        1,        1,
    +  1,        2,        2
    +)
    +
    +tplyr_table(mtcars, gear) %>%
    +  add_layer(
    +    group_desc(wt, by = vs) %>%
    +      set_format_strings(
    +        'Mean (SD)' = f_str('a.a+1 (a.a+2)', mean, sd)
    +      ) %>%
    +      set_precision_data(prec) %>%
    +      set_precision_on(wt)
    +  ) %>%
    +  build()
    +#> Error in set_precision_data(., prec): object 'prec' not found
    +
    +
    +
    +
    + + +
    + + + +
    + + + + + + + diff --git a/docs/reference/set_stats_as_columns.html b/docs/reference/set_stats_as_columns.html new file mode 100644 index 00000000..f24cbc55 --- /dev/null +++ b/docs/reference/set_stats_as_columns.html @@ -0,0 +1,169 @@ + +Set descriptive statistics as columns — set_stats_as_columns • Tplyr + Skip to contents + + +
    +
    +
    + +
    +

    In many cases, treatment groups are represented as columns within a table. +But some tables call for a transposed presentation, where the treatment +groups displayed by row, and the descriptive statistics are represented as +columns. set_stats_as_columns() allows Tplyr to output a built table +using this transposed format and deviate away from the standard +representation of treatment groups as columns.

    +
    + +
    +

    Usage

    +
    set_stats_as_columns(e, stats_as_columns = TRUE)
    +
    + +
    +

    Arguments

    +
    e
    +

    desc_layer on descriptive statistics summaries should be represented as columns

    + + +
    stats_as_columns
    +

    Boolean to set stats as columns

    + +
    +
    +

    Value

    + + +

    The input tplyr_layer

    +
    +
    +

    Details

    +

    This function leaves all specified by variables intact. The only switch that +happens during the build process is that the provided descriptive statistics +are transposed as columns and the treatment variable is left as rows. Column +variables will remain represented as columns, and multiple target variables +will also be respected properly.

    +
    + +
    +

    Examples

    +
    
    +dat <- tplyr_table(mtcars, gear) %>%
    +  add_layer(
    +    group_desc(wt, by = vs) %>%
    +      set_format_strings(
    +        "n"        = f_str("xx", n),
    +        "sd"       = f_str("xx.x", sd, empty = c(.overall = "BLAH")),
    +        "Median"   = f_str("xx.x", median),
    +        "Q1, Q3"   = f_str("xx, xx", q1, q3),
    +        "Min, Max" = f_str("xx, xx", min, max),
    +        "Missing"  = f_str("xx", missing)
    +      ) %>%
    +      set_stats_as_columns()
    +  ) %>%
    +  build()
    +
    +
    +
    +
    + + +
    + + + +
    + + + + + + + diff --git a/docs/reference/set_total_row_label.html b/docs/reference/set_total_row_label.html index fd03ea91..0a2215f7 100644 --- a/docs/reference/set_total_row_label.html +++ b/docs/reference/set_total_row_label.html @@ -1,256 +1,150 @@ - - - - - - - -Set the label for the total row — set_total_row_label • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Set the label for the total row — set_total_row_label • Tplyr - - - - - + + Skip to contents + +
    -
    - +

    The modified count_layer object

    +
    + +
    +

    Examples

    +
    # Load in pipe
    +library(magrittr)
    +
    +t <- tplyr_table(mtcars, gear) %>%
    +  add_layer(
    +    group_count(cyl) %>%
    +      add_total_row() %>%
    +      set_total_row_label("Total Cyl")
    +  )
    +build(t)
    +#> # A tibble: 4 × 6
    +#>   row_label1 var1_3        var1_4        var1_5      ord_layer_index ord_layer_1
    +#>   <chr>      <chr>         <chr>         <chr>                 <int>       <dbl>
    +#> 1 4          " 1 (  6.7%)" " 8 ( 66.7%)" " 2 ( 40.0…               1           1
    +#> 2 6          " 2 ( 13.3%)" " 4 ( 33.3%)" " 1 ( 20.0…               1           2
    +#> 3 8          "12 ( 80.0%)" " 0 (  0.0%)" " 2 ( 40.0…               1           3
    +#> 4 Total Cyl  "15 (100.0%)" "12 (100.0%)" " 5 (100.0…               1           4
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/str_indent_wrap.html b/docs/reference/str_indent_wrap.html new file mode 100644 index 00000000..abe50c42 --- /dev/null +++ b/docs/reference/str_indent_wrap.html @@ -0,0 +1,182 @@ + +Wrap strings to a specific width with hyphenation while preserving +indentation — str_indent_wrap • Tplyr + Skip to contents + + +
    +
    +
    + +
    +

    str_indent_wrap() leverages stringr::str_wrap() under the hood, but takes +some extra steps to preserve any indentation that has been applied to a +character element, and use hyphenated wrapping of single words that run +longer than the allotted wrapping width.

    +
    + +
    +

    Usage

    +
    str_indent_wrap(x, width = 10, tab_width = 5)
    +
    + +
    +

    Arguments

    +
    x
    +

    An input character vector

    + + +
    width
    +

    The desired width of elements within the output character vector

    + + +
    tab_width
    +

    The number of spaces to which tabs should be converted

    + +
    +
    +

    Value

    + + +

    A character vector with string wrapping applied

    +
    +
    +

    Details

    +

    The function stringr::str_wrap() is highly efficient, but in the +context of table creation there are two select features missing - hyphenation +for long running strings that overflow width, and respect for pre-indentation +of a character element. For example, in an adverse event table, you may have +body system rows as an un-indented column, and preferred terms as indented +columns. These strings may run long and require wrapping to not surpass the +column width. Furthermore, for crowded tables a single word may be longer +than the column width itself.

    +

    This function takes steps to resolve these two issues, while trying to +minimize additional overhead required to apply the wrapping of strings.

    +

    Note: This function automatically converts tabs to spaces. Tab width varies +depending on font, so width cannot automatically be determined within a data +frame. As such, users can specify the width

    +
    + +
    +

    Examples

    +
    ex_text1 <- c("RENAL AND URINARY DISORDERS", "   NEPHROLITHIASIS")
    +ex_text2 <- c("RENAL AND URINARY DISORDERS", "\tNEPHROLITHIASIS")
    +
    +cat(paste(str_indent_wrap(ex_text1, width=8), collapse="\n\n"),"\n")
    +#> RENAL
    +#> AND
    +#> URINARY
    +#> DISORDE-
    +#> RS
    +#> 
    +#>    NEPHROL-
    +#>    ITHIASI-
    +#>    S 
    +cat(paste(str_indent_wrap(ex_text2, tab_width=4), collapse="\n\n"),"\n")
    +#> RENAL AND
    +#> URINARY
    +#> DISORDERS
    +#> 
    +#>     NEPHROLIT-
    +#>     HIASIS 
    +
    +
    +
    + + +
    + + + +
    + + + + + + + diff --git a/docs/reference/table_format_defaults.html b/docs/reference/table_format_defaults.html index 18f4b83b..45e6bfff 100644 --- a/docs/reference/table_format_defaults.html +++ b/docs/reference/table_format_defaults.html @@ -1,193 +1,92 @@ - - - - - - - -Get or set the default format strings for descriptive statistics layers — get_desc_layer_formats • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + Skip to contents + +
    -
    - +
    -
    - +
    + - - - + diff --git a/docs/reference/target_var.html b/docs/reference/target_var.html index 7d89fa58..c3b28ef0 100644 --- a/docs/reference/target_var.html +++ b/docs/reference/target_var.html @@ -1,247 +1,139 @@ - - - - - - - -Set or return treat_var binding — get_target_var • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Set or return treat_var binding — get_target_var • Tplyr - - - - - - - - - - -
    -
    -
    +
    + +
    +
    -
    - - -
    +
    +

    Examples

    +
    # Load in pipe
    +library(magrittr)
    +iris$Species2 <- iris$Species
    +lay <- tplyr_table(iris, Species) %>%
    +  group_count(Species) %>%
    +  set_target_var(Species2)
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/tplyr.html b/docs/reference/tplyr.html index cdc0b6e3..e382666d 100644 --- a/docs/reference/tplyr.html +++ b/docs/reference/tplyr.html @@ -6,7 +6,7 @@ -A grammar of summary data for clinical reports — tplyr • Tplyr +A grammar of summary data for clinical reports — Tplyr • Tplyr @@ -46,8 +46,8 @@ - - + + @@ -67,6 +67,16 @@ + + + + @@ -84,7 +94,7 @@ Tplyr - 0.1.0 + 0.4.4
    @@ -92,7 +102,7 @@
    +

    Author

    + +

    Maintainer: Mike Stackhouse mike.stackhouse@atorusresearch.com (ORCID)

    +

    Authors:

    + +

    Other contributors:

      +
    • Atorus Research LLC [copyright holder]

    • +
    + +

    Examples

    -
    
    +    
    # Load in pipe +library(magrittr) +
    #> Warning: package ‘magrittr’ was built under R version 4.0.5
    +# Use just the defaults +tplyr_table(mtcars, gear) %>% + add_layer( + group_desc(mpg, by=cyl) + ) %>% + add_layer( + group_count(carb, by=cyl) + ) %>% + build() +
    #> # A tibble: 36 × 8 +#> row_label1 row_label2 var1_3 var1_4 var1_5 ord_layer_index ord_layer_1 +#> <chr> <chr> <chr> <chr> <chr> <int> <dbl> +#> 1 4 n " 1" " 8" " 2" 1 1 +#> 2 4 Mean (SD) "21.50 ( … "26.9… "28.2… 1 1 +#> 3 4 Median "21.50" "25.8… "28.2… 1 1 +#> 4 4 Q1, Q3 "21.50, 21.5… "22.8… "27.1… 1 1 +#> 5 4 Min, Max "21.5, 21.5" "21.4… "26.0… 1 1 +#> 6 4 Missing " 0" " 0" " 0" 1 1 +#> 7 6 n " 2" " 4" " 1" 1 2 +#> 8 6 Mean (SD) "19.75 ( 2.3… "19.7… "19.7… 1 2 +#> 9 6 Median "19.75" "20.1… "19.7… 1 2 +#> 10 6 Q1, Q3 "18.92, 20.5… "18.8… "19.7… 1 2 +#> # … with 26 more rows, and 1 more variable: ord_layer_2 <dbl>
    +# Customize and modify +tplyr_table(mtcars, gear) %>% + add_layer( + group_desc(mpg, by=cyl) %>% + set_format_strings( + "n" = f_str("xx", n), + "Mean (SD)" = f_str("a.a+1 (a.a+2)", mean, sd, empty='NA'), + "Median" = f_str("a.a+1", median), + "Q1, Q3" = f_str("a, a", q1, q3, empty=c(.overall='NA')), + "Min, Max" = f_str("a, a", min, max), + "Missing" = f_str("xx", missing) + ) + ) %>% + add_layer( + group_count(carb, by=cyl) %>% + add_risk_diff( + c('5', '3'), + c('4', '3') + ) %>% + set_format_strings( + n_counts = f_str('xx (xx%)', n, pct), + riskdiff = f_str('xx.xxx (xx.xxx, xx.xxx)', dif, low, high) + ) %>% + set_order_count_method("bycount") %>% + set_ordering_cols('4') %>% + set_result_order_var(pct) + ) %>% + build() +
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> Warning: Chi-squared approximation may be incorrect
    #> # A tibble: 36 × 10 +#> row_label1 row_label2 var1_3 var1_4 var1_5 ord_layer_index ord_layer_1 +#> <chr> <chr> <chr> <chr> <chr> <int> <dbl> +#> 1 4 n " 1" " 8" " 2" 1 1 +#> 2 4 Mean (SD) "21.50 ( … "26.9… "28.2… 1 1 +#> 3 4 Median "21.50" "25.8… "28.2… 1 1 +#> 4 4 Q1, Q3 "22, 22" "23, … "27, … 1 1 +#> 5 4 Min, Max "22, 22" "21, … "26, … 1 1 +#> 6 4 Missing " 0" " 0" " 0" 1 1 +#> 7 6 n " 2" " 4" " 1" 1 2 +#> 8 6 Mean (SD) "19.75 ( 2.3… "19.7… "19.7… 1 2 +#> 9 6 Median "19.75" "20.1… "19.7… 1 2 +#> 10 6 Q1, Q3 "19, 21" "19, … "20, … 1 2 +#> # … with 26 more rows, and 3 more variables: ord_layer_2 <dbl>, +#> # rdiff_5_3 <chr>, rdiff_4_3 <chr>
    +# A Shift Table +tplyr_table(mtcars, am) %>% + add_layer( + group_shift(vars(row=gear, column=carb), by=cyl) %>% + set_format_strings(f_str("xxx (xx.xx%)", n, pct)) + ) %>% + build() +
    #> # A tibble: 9 × 17 +#> row_label1 row_label2 var1_0_1 var1_0_2 var1_0_3 var1_0_4 var1_0_6 var1_0_8 +#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> +#> 1 4 3 " 1 (33.3… " 0 ( … " 0 ( … " 0 ( … " 0 ( … " 0 ( … +#> 2 4 4 " 0 ( 0.0… " 2 (6… " 0 ( … " 0 ( … " 0 ( … " 0 ( … +#> 3 4 5 " 0 ( 0.0… " 0 ( … " 0 ( … " 0 ( … " 0 ( … " 0 ( … +#> 4 6 3 " 2 (50.0… " 0 ( … " 0 ( … " 0 ( … " 0 ( … " 0 ( … +#> 5 6 4 " 0 ( 0.0… " 0 ( … " 0 ( … " 2 (5… " 0 ( … " 0 ( … +#> 6 6 5 " 0 ( 0.0… " 0 ( … " 0 ( … " 0 ( … " 0 ( … " 0 ( … +#> 7 8 3 " 0 ( 0.0… " 4 (3… " 3 (2… " 5 (4… " 0 ( … " 0 ( … +#> 8 8 4 " 0 ( 0.0… " 0 ( … " 0 ( … " 0 ( … " 0 ( … " 0 ( … +#> 9 8 5 " 0 ( 0.0… " 0 ( … " 0 ( … " 0 ( … " 0 ( … " 0 ( … +#> # … with 9 more variables: var1_1_1 <chr>, var1_1_2 <chr>, var1_1_3 <chr>, +#> # var1_1_4 <chr>, var1_1_6 <chr>, var1_1_8 <chr>, ord_layer_index <int>, +#> # ord_layer_1 <dbl>, ord_layer_2 <dbl>
    +
    -

    Site built with pkgdown 1.5.1.

    +

    Site built with pkgdown 1.6.1.

    diff --git a/docs/reference/tplyr_layer.html b/docs/reference/tplyr_layer.html index 44008c86..e2a791cd 100644 --- a/docs/reference/tplyr_layer.html +++ b/docs/reference/tplyr_layer.html @@ -1,54 +1,6 @@ - - - - - - - -Create a tplyr_layer object — tplyr_layer • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create a tplyr_layer object — tplyr_layer • Tplyr - - - - - - - - - - -
    -
    -
    +
    + +
    +
    -
    - - -
    +
    +

    Examples

    +
    tab <- tplyr_table(iris, Sepal.Width)
    +
    +l <- group_count(tab, by=vars('Label Text', Species),
    +                 target_var=Species, where= Sepal.Width < 5.5,
    +                 cols = Species)
    +
    +
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/tplyr_meta.html b/docs/reference/tplyr_meta.html new file mode 100644 index 00000000..188e2c27 --- /dev/null +++ b/docs/reference/tplyr_meta.html @@ -0,0 +1,175 @@ + +Tplyr Metadata Object — tplyr_meta • Tplyr + Skip to contents + + +
    +
    +
    + +
    +

    If a Tplyr table is built with the `metadata=TRUE` option specified, then +metadata is assembled behind the scenes to provide traceability on each +result cell derived. The functions `get_meta_result()` and +`get_meta_subset()` allow you to access that metadata by using an ID provided +in the row_id column and the column name of the result you'd like to access. +The purpose is of the row_id variable instead of a simple row index is to +provide a sort resistant reference of the originating column, so the output +Tplyr table can be sorted in any order but the metadata are still easily +accessible.

    +
    + +
    +

    Usage

    +
    tplyr_meta(names = list(), filters = exprs())
    +
    + +
    +

    Arguments

    +
    names
    +

    List of symbols

    + + +
    filters
    +

    List of expressions

    + +
    +
    +

    Value

    + + +

    tplyr_meta object

    +
    +
    +

    Details

    +

    The `tplyr_meta` object provided a list with two elements - names and +filters. The names contain every column from the target data.frame of the +Tplyr table that factored into the specified result cell, and the filters +contains all the necessary filters to subset the target data to create the +specified result cell. `get_meta_subset()` additionally provides a parameter to +specify any additional columns you would like to include in the returned +subset data frame.

    +
    + +
    +

    Examples

    +
    
    +tplyr_meta(
    +   names = rlang::quos(x, y, z),
    +   filters = rlang::quos(x == 1, y==2, z==3)
    + )
    +#> tplyr_meta: 3 names, 3 filters
    +#> Names:
    +#>     x, y, z 
    +#> Filters:
    +#>     x == 1, y == 2, z == 3 
    +
    +
    +
    +
    + + +
    + + + +
    + + + + + + + diff --git a/docs/reference/tplyr_table.html b/docs/reference/tplyr_table.html index bec139bd..1d06af81 100644 --- a/docs/reference/tplyr_table.html +++ b/docs/reference/tplyr_table.html @@ -1,227 +1,126 @@ - - - - - - - -Create a Tplyr table object — tplyr_table • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + Skip to contents + +
    -
    - +
    +

    Examples

    +
    
    +tab <- tplyr_table(iris, Species, where = Sepal.Length < 5.8)
    +
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/treat_grps.html b/docs/reference/treat_grps.html index 59b6fd5a..958bfef4 100644 --- a/docs/reference/treat_grps.html +++ b/docs/reference/treat_grps.html @@ -1,193 +1,92 @@ - - - - - - - -Combine existing treatment groups for summary — add_treat_grps • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Combine existing treatment groups for summary — add_treat_grps • Tplyr - - - - - + + Skip to contents + +
    -
    - - -
    +
    +

    Examples

    +
    tab <- tplyr_table(iris, Species)
    +
    +# A custom group
    +add_treat_grps(tab, "Not Setosa" = c("versicolor", "virginica"))
    +#> *** tplyr_table ***
    +#> Target (data.frame):
    +#> 	Name:  iris
    +#> 	Rows:  150
    +#> 	Columns:  5 
    +#> treat_var variable (quosure)
    +#> 	Species
    +#> header_n:  header groups
    +#> treat_grps groupings (list)
    +#> 	Not Setosa
    +#> Table Columns (cols):
    +#> where: TRUE
    +#> Number of layer(s): 0
    +#> layer_output: 0
    +
    +# Add a total group
    +add_total_group(tab)
    +#> *** tplyr_table ***
    +#> Target (data.frame):
    +#> 	Name:  iris
    +#> 	Rows:  150
    +#> 	Columns:  5 
    +#> treat_var variable (quosure)
    +#> 	Species
    +#> header_n:  header groups
    +#> treat_grps groupings (list)
    +#> 	Not Setosa
    +#> 	Total
    +#> Table Columns (cols):
    +#> where: TRUE
    +#> Number of layer(s): 0
    +#> layer_output: 0
    +
    +treat_grps(tab)
    +#> $`Not Setosa`
    +#> [1] "versicolor" "virginica" 
    +#> 
    +#> $Total
    +#> [1] "setosa"     "versicolor" "virginica" 
    +#> 
    +# Returns:
    +# $`Not Setosa`
    +#[1] "versicolor" "virginica"
    +#
    +#$Total
    +#[1] "setosa"     "versicolor" "virginica"
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/treat_var.html b/docs/reference/treat_var.html index 6efe2375..196359b9 100644 --- a/docs/reference/treat_var.html +++ b/docs/reference/treat_var.html @@ -1,260 +1,155 @@ - - - - - - - -Return or set the treatment variable binding — treat_var • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Return or set the treatment variable binding — treat_var • Tplyr - - - - - + + Skip to contents + +
    -
    - + +
    +

    Examples

    +
    tab <- tplyr_table(mtcars, cyl)
    +
    +set_treat_var(tab, gear)
    +#> *** tplyr_table ***
    +#> Target (data.frame):
    +#> 	Name:  mtcars
    +#> 	Rows:  32
    +#> 	Columns:  11 
    +#> treat_var variable (quosure)
    +#> 	gearpop_treat_var variable (quosure)
    +#> 	cyl
    +#> 
    +#> header_n:  header groups
    +#> treat_grps groupings (list)
    +#> Table Columns (cols):
    +#> where: TRUE
    +#> Number of layer(s): 0
    +#> layer_output: 0
    +
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/reference/where.html b/docs/reference/where.html index dbea1d8e..5ce19458 100644 --- a/docs/reference/where.html +++ b/docs/reference/where.html @@ -1,268 +1,160 @@ - - - - - - - -Set or return where binding for layer or table — get_where.tplyr_layer • Tplyr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Set or return where binding for layer or table — get_where.tplyr_layer • Tplyr - - - - - + + Skip to contents + +
    -
    - - -
    +
    +

    Examples

    +
    # Load in pipe
    +library(magrittr)
    +
    +iris$Species2 <- iris$Species
    +lay <- tplyr_table(iris, Species) %>%
    +  group_count(Species) %>%
    +  set_where(Petal.Length > 3) %>%
    +  # Set logic for pop_data as well
    +  set_pop_where(Petal.Length > 3)
    +
    +
    +
    +
    -
    - +
    + - - - + diff --git a/docs/sitemap.xml b/docs/sitemap.xml new file mode 100644 index 00000000..58b86646 --- /dev/null +++ b/docs/sitemap.xml @@ -0,0 +1,285 @@ + + + + https://atorus-research.github.io/Tplyr/404.html + + + https://atorus-research.github.io/Tplyr/CONTRIBUTING.html + + + https://atorus-research.github.io/Tplyr/ISSUE_TEMPLATE.html + + + https://atorus-research.github.io/Tplyr/LICENSE-text.html + + + https://atorus-research.github.io/Tplyr/LICENSE.html + + + https://atorus-research.github.io/Tplyr/PULL_REQUEST_TEMPLATE.html + + + https://atorus-research.github.io/Tplyr/articles/Advanced_Tables_with_tplyr.html + + + https://atorus-research.github.io/Tplyr/articles/Tplyr.html + + + https://atorus-research.github.io/Tplyr/articles/alpha-release.html + + + https://atorus-research.github.io/Tplyr/articles/beta-release.html + + + https://atorus-research.github.io/Tplyr/articles/count.html + + + https://atorus-research.github.io/Tplyr/articles/count_shift.html + + + https://atorus-research.github.io/Tplyr/articles/custom-metadata.html + + + https://atorus-research.github.io/Tplyr/articles/denom.html + + + https://atorus-research.github.io/Tplyr/articles/desc-vignette.html + + + https://atorus-research.github.io/Tplyr/articles/desc.html + + + https://atorus-research.github.io/Tplyr/articles/getting-started.html + + + https://atorus-research.github.io/Tplyr/articles/index.html + + + https://atorus-research.github.io/Tplyr/articles/layer_templates.html + + + https://atorus-research.github.io/Tplyr/articles/metadata.html + + + https://atorus-research.github.io/Tplyr/articles/options-vignette.html + + + https://atorus-research.github.io/Tplyr/articles/options.html + + + https://atorus-research.github.io/Tplyr/articles/readme.html + + + https://atorus-research.github.io/Tplyr/articles/riskdiff.html + + + https://atorus-research.github.io/Tplyr/articles/shift.html + + + https://atorus-research.github.io/Tplyr/articles/sort.html + + + https://atorus-research.github.io/Tplyr/articles/styled-table.html + + + https://atorus-research.github.io/Tplyr/articles/table.html + + + https://atorus-research.github.io/Tplyr/articles/test.html + + + https://atorus-research.github.io/Tplyr/articles/tplyr.html + + + https://atorus-research.github.io/Tplyr/articles/tplyr_internals.html + + + https://atorus-research.github.io/Tplyr/authors.html + + + https://atorus-research.github.io/Tplyr/index.html + + + https://atorus-research.github.io/Tplyr/news/index.html + + + https://atorus-research.github.io/Tplyr/reference/Tplyr.html + + + https://atorus-research.github.io/Tplyr/reference/add_column_headers.html + + + https://atorus-research.github.io/Tplyr/reference/add_risk_diff.html + + + https://atorus-research.github.io/Tplyr/reference/add_total_row.html + + + https://atorus-research.github.io/Tplyr/reference/append_metadata.html + + + https://atorus-research.github.io/Tplyr/reference/apply_formats.html + + + https://atorus-research.github.io/Tplyr/reference/apply_row_masks.html + + + https://atorus-research.github.io/Tplyr/reference/build.html + + + https://atorus-research.github.io/Tplyr/reference/by.html + + + https://atorus-research.github.io/Tplyr/reference/f_str.html + + + https://atorus-research.github.io/Tplyr/reference/get_meta_result.html + + + https://atorus-research.github.io/Tplyr/reference/get_meta_subset.html + + + https://atorus-research.github.io/Tplyr/reference/get_metadata.html + + + https://atorus-research.github.io/Tplyr/reference/get_numeric_data.html + + + https://atorus-research.github.io/Tplyr/reference/get_stats_data.html + + + https://atorus-research.github.io/Tplyr/reference/header_n.html + + + https://atorus-research.github.io/Tplyr/reference/index.html + + + https://atorus-research.github.io/Tplyr/reference/keep_levels.html + + + https://atorus-research.github.io/Tplyr/reference/layer_attachment.html + + + https://atorus-research.github.io/Tplyr/reference/layer_constructors.html + + + https://atorus-research.github.io/Tplyr/reference/layer_templates.html + + + https://atorus-research.github.io/Tplyr/reference/meta_extraction.html + + + https://atorus-research.github.io/Tplyr/reference/metadata_additions.html + + + https://atorus-research.github.io/Tplyr/reference/new_tplyr_meta.html + + + https://atorus-research.github.io/Tplyr/reference/ordering.html + + + https://atorus-research.github.io/Tplyr/reference/pipe.html + + + https://atorus-research.github.io/Tplyr/reference/pop_data.html + + + https://atorus-research.github.io/Tplyr/reference/pop_treat_var.html + + + https://atorus-research.github.io/Tplyr/reference/precision_by.html + + + https://atorus-research.github.io/Tplyr/reference/precision_on.html + + + https://atorus-research.github.io/Tplyr/reference/process_formatting.html + + + https://atorus-research.github.io/Tplyr/reference/process_metadata.html + + + https://atorus-research.github.io/Tplyr/reference/process_statistic_data.html + + + https://atorus-research.github.io/Tplyr/reference/process_statistic_formatting.html + + + https://atorus-research.github.io/Tplyr/reference/process_summaries.html + + + https://atorus-research.github.io/Tplyr/reference/set_custom_summaries.html + + + https://atorus-research.github.io/Tplyr/reference/set_denom_ignore.html + + + https://atorus-research.github.io/Tplyr/reference/set_denom_where.html + + + https://atorus-research.github.io/Tplyr/reference/set_denoms_by.html + + + https://atorus-research.github.io/Tplyr/reference/set_distinct_by.html + + + https://atorus-research.github.io/Tplyr/reference/set_format_strings.html + + + https://atorus-research.github.io/Tplyr/reference/set_indentation.html + + + https://atorus-research.github.io/Tplyr/reference/set_missing_count.html + + + https://atorus-research.github.io/Tplyr/reference/set_nest_count.html + + + https://atorus-research.github.io/Tplyr/reference/set_numeric_threshold.html + + + https://atorus-research.github.io/Tplyr/reference/set_numeric_where.html + + + https://atorus-research.github.io/Tplyr/reference/set_outer_sort_position.html + + + https://atorus-research.github.io/Tplyr/reference/set_precision_data.html + + + https://atorus-research.github.io/Tplyr/reference/set_stats_as_columns.html + + + https://atorus-research.github.io/Tplyr/reference/set_total_row_label.html + + + https://atorus-research.github.io/Tplyr/reference/str_indent_wrap.html + + + https://atorus-research.github.io/Tplyr/reference/table_format_defaults.html + + + https://atorus-research.github.io/Tplyr/reference/target_var.html + + + https://atorus-research.github.io/Tplyr/reference/tplyr.html + + + https://atorus-research.github.io/Tplyr/reference/tplyr_layer.html + + + https://atorus-research.github.io/Tplyr/reference/tplyr_meta.html + + + https://atorus-research.github.io/Tplyr/reference/tplyr_table.html + + + https://atorus-research.github.io/Tplyr/reference/treat_grps.html + + + https://atorus-research.github.io/Tplyr/reference/treat_var.html + + + https://atorus-research.github.io/Tplyr/reference/where.html + + diff --git a/man/append_metadata.Rd b/man/append_metadata.Rd new file mode 100644 index 00000000..0bce5353 --- /dev/null +++ b/man/append_metadata.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/meta.R +\name{append_metadata} +\alias{append_metadata} +\title{Append the Tplyr table metadata dataframe} +\usage{ +append_metadata(t, meta) +} +\arguments{ +\item{t}{A tplyr_table object} + +\item{meta}{A dataframe fitting the specifications of the details section of +this function} +} +\value{ +A tplyr_table object +} +\description{ +\code{append_metadata()} allows a user to extend the Tplyr metadata data frame +with user provided data. In some tables, Tplyr may be able to provided most +of the data, but a user may have to extend the table with other summaries, +statistics, etc. This function allows the user to extend the tplyr_table's +metadata with their own metadata content using custom data frames created +using the \code{tplyr_meta} object. +} +\details{ +As this is an advanced feature of Tplyr, ownership is on the user to make +sure the metadata data frame is assembled properly. The only restrictions +applied by \code{append_metadata()} are that \code{meta} must have a column named +\code{row_id}, and the values in \code{row_id} cannot be duplicates of any \code{row_id} +value already present in the Tplyr metadata dataframe. \code{tplyr_meta()} objects +align with constructed dataframes using the \code{row_id} and output dataset +column name. As such, \code{tplyr_meta()} objects should be inserted into a data +frame using a list column. +} +\examples{ +t <- tplyr_table(mtcars, gear) \%>\% + add_layer( + group_desc(wt) + ) + +t \%>\% + build(metadata=TRUE) + +m <- tibble::tibble( + row_id = c('x1_1'), + var1_3 = list(tplyr_meta(rlang::quos(a, b, c), rlang::quos(a==1, b==2, c==3))) +) + +append_metadata(t, m) +} diff --git a/man/apply_formats.Rd b/man/apply_formats.Rd new file mode 100644 index 00000000..7564bfa6 --- /dev/null +++ b/man/apply_formats.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/apply_formats.R +\name{apply_formats} +\alias{apply_formats} +\title{Apply Format Strings outside of a Tplyr table} +\usage{ +apply_formats(format_string, ..., empty = c(.overall = "")) +} +\arguments{ +\item{format_string}{The desired display format. X's indicate digits. On the +left, the number of x's indicates the integer length. On the right, the +number of x's controls decimal precision and rounding. Variables are +inferred by any separation of the 'x' values other than a decimal.} + +\item{...}{The variables to be formatted using the format specified in +\code{format_string}. These must be numeric variables.} + +\item{empty}{The string to display when the numeric data is not available. +Use a single element character vector, with the element named '.overall' to +instead replace the whole string.} +} +\value{ +Character vector of formatted values +} +\description{ +The \code{f_str} object in Tplyr is used to drive formatting of the outputs +strings within a Tplyr table. This function allows a user to use the same +interface to apply formatted string on any data frame within a +\code{dplyr::mutate()} context. +} +\details{ +Note that auto-precision is not currently supported within \code{apply_formats()} +} +\examples{ + +library(dplyr) + +mtcars \%>\% + head() \%>\% + mutate( + fmt_example = apply_formats('xxx (xx.x)', hp, wt) + ) +} diff --git a/man/build.Rd b/man/build.Rd index 03fe50e4..0cdff8b3 100644 --- a/man/build.Rd +++ b/man/build.Rd @@ -4,28 +4,42 @@ \alias{build} \title{Trigger the execution of the \code{tplyr_table}} \usage{ -build(x) +build(x, metadata = FALSE) } \arguments{ \item{x}{A \code{tplyr_table} object} + +\item{metadata}{Trigger to build metadata. Defaults to FALSE} } \value{ An executed \code{tplyr_table} } \description{ -The functions used to assemble a \code{tplyr_table} object and each of the layers do not trigger the processing of any data. Rather, a lazy -execution style is used to allow you to contruct your table and then explicitly state when the data processing should happen. -\code{build} triggers this event. +The functions used to assemble a \code{tplyr_table} object and +each of the layers do not trigger the processing of any data. Rather, a lazy +execution style is used to allow you to construct your table and then +explicitly state when the data processing should happen. \code{build} +triggers this event. } \details{ -When the \code{build} command is executed, all of the data processing commences. Any preprocessing necessary within the table environment -takes place first. Next, each of the layers begins executing. Once the layers complete executing, the output of each layer is stacked -into the resulting data frame. +When the \code{build} command is executed, all of the data +processing commences. Any pre-processing necessary within the table +environment takes place first. Next, each of the layers begins executing. +Once the layers complete executing, the output of each layer is stacked into +the resulting data frame. + +Once this process is complete, any post-processing necessary within the table +environment takes place, and the final output can be delivered. Metadata and +traceability information are kept within each of the layer environments, +which allows an investigation into the source of the resulting datapoints. +For example, numeric data from any summaries performed is maintained and +accessible within a layer using \code{\link{get_numeric_data}}. -Once this process is complete, any post-processing necessary within the table environment takes place, and the final output can be -delivered. Metadata and traceability information are kept within each of the layer environments, which allows an investigation into the -source of the resulting datapoints. For example, numeric data from any summaries performed is maintained and accessible within -a layer using \code{\link{get_numeric_data}}. +The `metadata` option of build will trigger the construction of traceability +metadata for the constructed data frame. Essentially, for every "result" that +Tplyr produces, Tplyr can also generate the steps necessary to obtain the +source data which produced that result from the input. For more information, +see vignette("metadata"). } \examples{ # Load in Pipe diff --git a/man/f_str.Rd b/man/f_str.Rd index a99f3259..3b599384 100644 --- a/man/f_str.Rd +++ b/man/f_str.Rd @@ -26,63 +26,102 @@ A \code{f_str} object } \description{ \code{f_str} objects are intended to be used within the function -\code{set_format_strings}. The \code{f_str} object carries information that -powers a significant amount of layer processing. The \code{format_string} -parameter is capable of controlling the display of a data point and decimal -precision. The variables provided in \code{...} control which data points are -used to populate the string formatted output. +\code{set_format_strings}. The \code{f_str} object carries information that powers a +significant amount of layer processing. The \code{format_string} parameter is +capable of controlling the display of a data point and decimal precision. The +variables provided in \code{...} control which data points are used to populate +the string formatted output. } \details{ Format strings are one of the most powerful components of 'Tplyr'. - Traditionally, converting numeric values into strings for presentation can - consume a good deal of time. Values and decimals need to align between - rows, rounding before trimming is sometimes forgotten - it can become a - tedious mess that is realistically not an important part of the analysis - being performed. 'Tplyr' makes this process as simple as we can, while - still allowing flexibility to the user. +Traditionally, converting numeric values into strings for presentation can +consume a good deal of time. Values and decimals need to align between +rows, rounding before trimming is sometimes forgotten - it can become a +tedious mess that is realistically not an important part of the analysis +being performed. 'Tplyr' makes this process as simple as we can, while +still allowing flexibility to the user. - Tplyr provides both manual and automatic decimal precision formatting. The - display of the numbers in the resulting data frame is controlled by the - \code{format_string} parameter. For manual precision, just like dummy - values may be presented on your mocks, integer and decimal precision is - specified by the user providing a string of 'x's for how you'd like your - numbers formatted. If you'd like 2 integers with 3 decimal places, you - specify your string as 'xx.xxx'. 'Tplyr' does the work to get the numbers - in the right place. +Tplyr provides both manual and automatic decimal precision formatting. The +display of the numbers in the resulting data frame is controlled by the +\code{format_string} parameter. For manual precision, just like dummy values may +be presented on your mocks, integer and decimal precision is specified by +the user providing a string of 'x's for how you'd like your numbers +formatted. If you'd like 2 integers with 3 decimal places, you specify your +string as 'xx.xxx'. 'Tplyr' does the work to get the numbers in the right +place. - To take this a step further, automatic decimal precision can also be - obtained based on the collected precision within the data. When creating - tables where results vary by some parameter, different results may call for - different degrees of precision. To use automatic precision, use a single - 'a' on either the integer and decimal side. If you'd like to use increased - precision (i.e. you'd like mean to be collected precision +1), use 'a+1'. - So if you'd like both integer and and decimal precision to be based on the - data as collected, you can use a format like 'a.a' - or for collected+1 - decimal precision, 'a.a+1'. You can mix and match this with manual formats - as well, making format strings such as 'xx.a+1'. +To take this a step further, automatic decimal precision can also be +obtained based on the collected precision within the data. When creating +tables where results vary by some parameter, different results may call for +different degrees of precision. To use automatic precision, use a single +'a' on either the integer and decimal side. If you'd like to use increased +precision (i.e. you'd like mean to be collected precision +1), use 'a+1'. +So if you'd like both integer and and decimal precision to be based on the +data as collected, you can use a format like 'a.a' - or for collected+1 +decimal precision, 'a.a+1'. You can mix and match this with manual formats +as well, making format strings such as 'xx.a+1'. - If you want two numbers on the same line, you provide two sets of x's. For - example, if you're presenting a value like "mean (sd)" - you could provide - the string 'xx.xx (xx.xxx)', or perhaps 'a.a+1 (a.a+2). Note that you're - able to provide different integer lengths and different decimal precision - for the two values. Each format string is independent and relates only to - the format specified. +If you want two numbers on the same line, you provide two sets of x's. For +example, if you're presenting a value like "mean (sd)" - you could provide +the string 'xx.xx (xx.xxx)', or perhaps 'a.a+1 (a.a+2). Note that you're +able to provide different integer lengths and different decimal precision +for the two values. Each format string is independent and relates only to +the format specified. - The other parameters of the \code{f_str} call specify what values should - fill the x's. \code{f_str} objects are used slightly differently between - different layers. When declaring a format string within a count layer, - \code{f_str} expects to see the values \code{n} and (if desired) - \code{pct}, which specifies the formatting for your n's and percent values. - But in descriptive statistic layers, \code{f_str} parameters refer to the - names of the summaries being performed, either by built in defaults, or - custom summaries declared using \code{\link{set_custom_summaries}}. See - \code{\link{set_format_strings}} for some more notes about layers specific - implementation. +The other parameters of the \code{f_str} call specify what values should fill +the x's. \code{f_str} objects are used slightly differently between different +layers. When declaring a format string within a count layer, \code{f_str()} +expects to see the values \code{n} or \code{distinct_n} for event or distinct counts, +\code{pct} or \code{distinct_pct} for event or distinct percentages, or \code{total} or +\code{distinct_total} for denominator calculations. But in descriptive statistic +layers, \code{f_str} parameters refer to the names of the summaries being +performed, either by built in defaults, or custom summaries declared using +\code{\link[=set_custom_summaries]{set_custom_summaries()}}. See \code{\link[=set_format_strings]{set_format_strings()}} for some more notes +about layers specific implementation. - Count and shift layers frequencies and percentages can be specified with - 'n' and 'pct' respectively. Distinct values can also be presented in count - layers with the arguments 'distinct' and 'distinct_total'. +An \code{f_str()} may also be used outside of a Tplyr table. The function +\code{\link[=apply_formats]{apply_formats()}} allows you to apply an \code{f_str} within the context of +\code{\link[dplyr:mutate]{dplyr::mutate()}} or more generally a vectorized function. } +\section{Valid \code{f_str()} Variables by Layer Type}{ + + +Valid variables allowed within the \code{...} parameter of \code{f_str()} differ by +layer type. +\itemize{ +\item Count layers +\itemize{ +\item \code{n} +\item \code{pct} +\item \code{total} +\item \code{distinct_n} +\item \code{distinct_pct} +\item \code{distinct_total} +} +\item Shift layers +\itemize{ +\item \code{n} +\item \code{pct} +\item \code{total} +} +\item Desc layers +\itemize{ +\item \code{n} +\item \code{mean} +\item \code{sd} +\item \code{median} +\item \code{variance} +\item \code{min} +\item \code{max} +\item \code{iqr} +\item \code{q1} +\item \code{q3} +\item \code{missing} +\item Custom summaries created by \code{\link[=set_custom_summaries]{set_custom_summaries()}} +} +} +} + \examples{ f_str("xx.x (xx.x)", mean, sd) diff --git a/man/get_meta_result.Rd b/man/get_meta_result.Rd new file mode 100644 index 00000000..572324d8 --- /dev/null +++ b/man/get_meta_result.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/meta_utils.R +\name{get_meta_result} +\alias{get_meta_result} +\title{Extract the result metadata of a Tplyr table} +\usage{ +get_meta_result(x, row_id, column, ...) +} +\arguments{ +\item{x}{A built Tplyr table or a dataframe} + +\item{row_id}{The row_id value of the desired cell, provided as a character +string} + +\item{column}{The result column of interest, provided as a character string} + +\item{...}{additional arguments} +} +\value{ +A tplyr_meta object +} +\description{ +Given a row_id value and a result column, this function will return the +tplyr_meta object associated with that 'cell'. +} +\details{ +If a Tplyr table is built with the \code{metadata=TRUE} option specified, then +metadata is assembled behind the scenes to provide traceability on each +result cell derived. The functions \code{get_meta_result()} and +\code{get_meta_subset()} allow you to access that metadata by using an ID provided +in the row_id column and the column name of the result you'd like to access. +The purpose is of the row_id variable instead of a simple row index is to +provide a sort resistant reference of the originating column, so the output +Tplyr table can be sorted in any order but the metadata are still easily +accessible. + +The \code{tplyr_meta} object provided a list with two elements - names and +filters. The metadata contain every column from the target data.frame of the +Tplyr table that factored into the specified result cell, and the filters +contains all the necessary filters to subset to data summarized to create the +specified result cell. \code{get_meta_subset()} additionally provides a parameter to +specify any additional columns you would like to include in the returned +subset data frame. +} +\examples{ +t <- tplyr_table(mtcars, cyl) \%>\% + add_layer( + group_desc(hp) + ) + +dat <- t \%>\% build(metadata = TRUE) + +get_meta_result(t, 'd1_1', 'var1_4') + +m <- t$metadata +dat <- t$target + +get_meta_result(t, 'd1_1', 'var1_4') +} diff --git a/man/get_meta_subset.Rd b/man/get_meta_subset.Rd new file mode 100644 index 00000000..f8028394 --- /dev/null +++ b/man/get_meta_subset.Rd @@ -0,0 +1,78 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/meta_utils.R +\name{get_meta_subset} +\alias{get_meta_subset} +\alias{get_meta_subset.data.frame} +\alias{get_meta_subset.tplyr_table} +\title{Extract the subset of data based on result metadata} +\usage{ +get_meta_subset(x, row_id, column, add_cols = vars(USUBJID), ...) + +\method{get_meta_subset}{data.frame}( + x, + row_id, + column, + add_cols = vars(USUBJID), + target = NULL, + ... +) + +\method{get_meta_subset}{tplyr_table}(x, row_id, column, add_cols = vars(USUBJID), ...) +} +\arguments{ +\item{x}{A built Tplyr table or a dataframe} + +\item{row_id}{The row_id value of the desired cell, provided as a character +string} + +\item{column}{The result column of interest, provided as a character string} + +\item{add_cols}{Additional columns to include in subset data.frame output} + +\item{...}{additional arguments} + +\item{target}{A data frame to be subset (if not pulled from a Tplyr table)} +} +\value{ +A data.frame +} +\description{ +Given a row_id value and a result column, this function will return the +subset of data referenced by the tplyr_meta object associated with that +'cell', which provides traceability to tie a result to its source. +} +\details{ +If a Tplyr table is built with the \code{metadata=TRUE} option specified, then +metadata is assembled behind the scenes to provide traceability on each +result cell derived. The functions \code{get_meta_result()} and +\code{get_meta_subset()} allow you to access that metadata by using an ID provided +in the row_id column and the column name of the result you'd like to access. +The purpose is of the row_id variable instead of a simple row index is to +provide a sort resistant reference of the originating column, so the output +Tplyr table can be sorted in any order but the metadata are still easily +accessible. + +The \code{tplyr_meta} object provided a list with two elements - names and +filters. The metadata contain every column from the target data.frame of the +Tplyr table that factored into the specified result cell, and the filters +contains all the necessary filters to subset to data summarized to create the +specified result cell. \code{get_meta_subset()} additionally provides a parameter +to specify any additional columns you would like to include in the returned +subset data frame. +} +\examples{ +t <- tplyr_table(mtcars, cyl) \%>\% + add_layer( + group_desc(hp) + ) + + +dat <- t \%>\% build(metadata = TRUE) + +get_meta_subset(t, 'd1_1', 'var1_4', add_cols = dplyr::vars(carb)) + +m <- t$metadata +dat <- t$target + +get_meta_subset(t, 'd1_1', 'var1_4', add_cols = dplyr::vars(carb), target = target) +} diff --git a/man/get_metadata.Rd b/man/get_metadata.Rd new file mode 100644 index 00000000..01909745 --- /dev/null +++ b/man/get_metadata.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/meta.R +\name{get_metadata} +\alias{get_metadata} +\title{Get the metadata dataframe from a tplyr_table} +\usage{ +get_metadata(t) +} +\arguments{ +\item{t}{A Tplyr table with metadata built} +} +\value{ +Tplyr metadata dataframe +} +\description{ +Pull out the metadata dataframe from a tplyr_table to work with it directly +} +\examples{ +t <- tplyr_table(mtcars, gear) \%>\% + add_layer( + group_desc(wt) + ) + +t \%>\% + build(metadata=TRUE) + +get_metadata(t) +} diff --git a/man/layer_attachment.Rd b/man/layer_attachment.Rd index e0d9bfc1..a6a00f5c 100644 --- a/man/layer_attachment.Rd +++ b/man/layer_attachment.Rd @@ -21,8 +21,6 @@ add_layers(parent, ...) \value{ A \code{tplyr_table} or \code{tplyr_layer}/\code{tplyr_subgroup_layer} with a new layer inserted into the \code{layer} binding - - } \description{ \code{add_layer} attaches a \code{tplyr_layer} to a \code{tplyr_table} object. This allows diff --git a/man/layer_templates.Rd b/man/layer_templates.Rd new file mode 100644 index 00000000..329003dc --- /dev/null +++ b/man/layer_templates.Rd @@ -0,0 +1,119 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/layer_templates.R +\name{new_layer_template} +\alias{new_layer_template} +\alias{remove_layer_template} +\alias{get_layer_template} +\alias{get_layer_templates} +\alias{use_template} +\title{Create, view, extract, remove, and use Tplyr layer templates} +\usage{ +new_layer_template(name, template) + +remove_layer_template(name) + +get_layer_template(name) + +get_layer_templates() + +use_template(name, ..., add_params = NULL) +} +\arguments{ +\item{name}{Template name} + +\item{template}{Template layer syntax, starting with a layer constructor +\code{group_count|desc|shift}. This function should be called with an ellipsis +argument (i.e. group_count(...)).} + +\item{...}{Arguments passed directly into a layer constructor, matching the +target, by, and where parameters.} + +\item{add_params}{Additional parameters passed into layer modifier functions. +These arguments are specified in a template within curly brackets such as +{param}. Supply as a named list, where the element name is the parameter.} +} +\description{ +There are several scenarios where a layer template may be useful. Some +tables, like demographics tables, may have many layers that will all +essentially look the same. Categorical variables will have the same count +layer settings, and continuous variables will have the same desc layer +settings. A template allows a user to build those settings once per layer, +then reference the template when the Tplyr table is actually built. +} +\details{ +This suite of functions allows a user to create and use layer templates. +Layer templates allow a user to pre-build and reuse an entire layer +configuration, from the layer constructor down to all modifying functions. +Furthermore, users can specify parameters they may want to be +interchangeable. Additionally, layer templates are extensible, so a template +can be use and then further extended with additional layer modifying +functions. + +Layers are created using \code{new_layer_template()}. To use a layer, use the +function \code{use_template()} in place of \code{group_count|desc|shift()}. If you want +to view a specific template, use \code{get_layer_template()}. If you want to view +all templates, use \code{get_layer_templates()}. And to remove a layer template use +\code{remove_layer_template()}. Layer templates themselves are stored in the +option \code{tplyr.layer_templates}, but a user should not access this directly +and instead use the Tplyr supplied functions. + +When providing the template layer syntax, the layer must start with a layer +constructor. These are one of the function \code{group_count()}, \code{group_desc()}, +or \code{group_shift()}. Instead of passing arguments into these function, +templates are specified using an ellipsis in the constructor, i.e. +\code{group_count(...)}. This is required, as after the template is built a user +supplies these arguments via \code{use_template()} + +\code{use_template()} takes the \code{group_count|desc|shift()} arguments by default. +If a user specified additional arguments in the template, these are provided +in a list throught the argument \code{add_params}. Provide these arguments exactly +as you would in a normal layer. When creating the template, these parameters +can be specified by using curly brackets. See the examples for details. +} +\examples{ + +op <- options() + +new_layer_template( + "example_template", + group_count(...) \%>\% + set_format_strings(f_str('xx (xx\%)', n, pct)) +) + +get_layer_templates() + +get_layer_template("example_template") + +tplyr_table(mtcars, vs) \%>\% + add_layer( + use_template("example_template", gear) + ) \%>\% + build() + +remove_layer_template("example_template") + +new_layer_template( + "example_template", + group_count(...) \%>\% + set_format_strings(f_str('xx (xx\%)', n, pct)) \%>\% + set_order_count_method({sort_meth}) \%>\% + set_ordering_cols({sort_cols}) +) + +get_layer_template("example_template") + +tplyr_table(mtcars, vs) \%>\% + add_layer( + use_template("example_template", gear, add_params = + list( + sort_meth = "bycount", + sort_cols = `1` + )) + ) \%>\% + build() + +remove_layer_template("example_template") + +options(op) +} +\concept{Layer Templates} diff --git a/man/metadata_additions.Rd b/man/metadata_additions.Rd new file mode 100644 index 00000000..7e18849f --- /dev/null +++ b/man/metadata_additions.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/meta.R +\name{add_variables} +\alias{add_variables} +\alias{add_filters} +\title{Add variables to a tplyr_meta object} +\usage{ +add_variables(meta, names) + +add_filters(meta, filters) +} +\arguments{ +\item{meta}{A tplyr_meta object} + +\item{names}{A list of names, providing variable names of interest. Provide +as a list of quosures using \code{rlang::quos()}} + +\item{filters}{A list of symbols, providing variable names of interest. Provide +as a list of quosures using `rlang::quos()`} +} +\value{ +tplyr_meta object +} +\description{ +Add additional variable names to a \code{tplyr_meta()} object. +} +\examples{ + +m <- tplyr_meta() +m <- add_variables(m, rlang::quos(a, b, c)) +m <- add_filters(m, rlang::quos(a==1, b==2, c==3)) +m +} +\concept{Metadata additions} diff --git a/man/pipe.Rd b/man/pipe.Rd new file mode 100644 index 00000000..1f8f237b --- /dev/null +++ b/man/pipe.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-pipe.R +\name{\%>\%} +\alias{\%>\%} +\title{Pipe operator} +\usage{ +lhs \%>\% rhs +} +\arguments{ +\item{lhs}{A value or the magrittr placeholder.} + +\item{rhs}{A function call using the magrittr semantics.} +} +\value{ +The result of calling `rhs(lhs)`. +} +\description{ +See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. +} +\keyword{internal} diff --git a/man/process_formatting.Rd b/man/process_formatting.Rd index 81389dfe..127115e1 100644 --- a/man/process_formatting.Rd +++ b/man/process_formatting.Rd @@ -12,7 +12,7 @@ process_formatting(x, ...) \item{...}{arguments passed to dispatch} } \value{ -The formatted_table object that is binded to the layer +The formatted_table object that is bound to the layer } \description{ This is an internal method, but is exported to support S3 dispatch. Not intended for direct use by a user. diff --git a/man/process_metadata.Rd b/man/process_metadata.Rd new file mode 100644 index 00000000..770c13b1 --- /dev/null +++ b/man/process_metadata.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build.R +\name{process_metadata} +\alias{process_metadata} +\title{Process layers to get metadata tables} +\usage{ +process_metadata(x, ...) +} +\arguments{ +\item{x}{A tplyr_layer object} + +\item{...}{arguments passed to dispatch} +} +\value{ +The formatted_meta object that is bound to the layer +} +\description{ +This is an internal method, but is exported to support S3 dispatch. Not intended for direct use by a user. +} +\keyword{internal} diff --git a/man/set_distinct_by.Rd b/man/set_distinct_by.Rd index 1138e8d1..1847802a 100644 --- a/man/set_distinct_by.Rd +++ b/man/set_distinct_by.Rd @@ -23,8 +23,9 @@ the by variables used to determine a distinct count. \details{ When a \code{distinct_by} value is set, distinct counts will be used by default. If you wish to combine distinct and not distinct counts, you can -choose which to display in your \code{\link{f_str}} objects using \code{n}, -\code{pct}, \code{distinct}, and \code{distinct_pct}. +choose which to display in your \code{f_str()} objects using \code{n}, +\code{pct}, \code{distinct_n}, and \code{distinct_pct}. Additionally, denominators +may be presented using \code{total} and \code{distinct_total} } \examples{ #Load in pipe diff --git a/man/set_format_strings.Rd b/man/set_format_strings.Rd index b41e935d..b1828bde 100644 --- a/man/set_format_strings.Rd +++ b/man/set_format_strings.Rd @@ -23,7 +23,7 @@ on integer precision, and a 'dec' element for the cap on decimal precision.} \value{ The layer environment with the format string binding added - +tplyr_layer object with formats attached Returns the modified layer object. } @@ -42,7 +42,7 @@ Format strings are one of the most powerful components of 'Tplyr'. can, while still allowing flexibility to the user. In a count layer, you can simply provide a single \code{\link{f_str}} - object to specify how you want your n's (and possibly percents) formatted. + object to specify how you want your n's, percentages, and denominators formatted. If you are additionally supplying a statistic, like risk difference using \code{\link{add_risk_diff}}, you specify the count formats using the name 'n_counts'. The risk difference formats would then be specified using the diff --git a/man/set_numeric_threshold.Rd b/man/set_numeric_threshold.Rd new file mode 100644 index 00000000..846d5d08 --- /dev/null +++ b/man/set_numeric_threshold.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/count_bindings.R +\name{set_numeric_threshold} +\alias{set_numeric_threshold} +\title{Set a numeric cutoff} +\usage{ +set_numeric_threshold(e, numeric_cutoff, stat, column = NULL) +} +\arguments{ +\item{e}{A \code{count_layer} object} + +\item{numeric_cutoff}{A numeric value where only values greater than or equal +to will be displayed.} + +\item{stat}{The statistic to use when filtering out rows. Either 'n', +'distinct_n', or 'pct' are allowable} + +\item{column}{If only a particular column should be used to cutoff values, it +can be supplied here as a character value.} +} +\value{ +The modified Tplyr layer object +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} + +In certain tables, it may be necessary to only include rows that meet numeric +conditions. Rows that are less than a certain cutoff can be suppressed from +the output. This function allows you to pass a cutoff, a cutoff stat(n, +distinct_n, pct, or distinct_pct) to supress values that are lesser than the +cutoff. +} +\examples{ +mtcars \%>\% +tplyr_table(gear) \%>\% + add_layer( + group_count(cyl) \%>\% + set_numeric_threshold(10, "n") \%>\% + add_total_row() \%>\% + set_order_count_method("bycount") + ) +} diff --git a/man/set_precision_data.Rd b/man/set_precision_data.Rd new file mode 100644 index 00000000..73a76bb3 --- /dev/null +++ b/man/set_precision_data.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/layer_bindings.R +\name{set_precision_data} +\alias{set_precision_data} +\title{Set precision data} +\usage{ +set_precision_data(layer, prec, default = c("error", "auto")) +} +\arguments{ +\item{layer}{A \code{tplyr_layer} object} + +\item{prec}{A dataframe following the structure specified in the function details} + +\item{default}{Handling of unspecified by variable groupings. Defaults to 'error'. Set to 'auto' to automatically infer any missing groups.} +} +\description{ +In some cases, there may be organizational standards surrounding decimal precision. +For example, there may be a specific standard around the representation of precision relating +to lab results. As such, \code{set_precision_data()} provides an interface to provide integer and +decimal precision from an external data source. +} +\details{ +The ultimate behavior of this feature is just that of the existing auto precision method, except +that the precision is specified in the provided precision dataset rather than inferred from the source data. +At a minimum, the precision dataset must contain the integer variables \code{max_int} and \code{max_dec}. If by variables +are provided, those variables must be available in the layer by variables. + +When the table is built, by default Tplyr will error if the precision dataset is missing by variable groupings +that exist in the target dataset. This can be overriden using the \code{default} parameter. If \code{default} is set to +"auto", any missing values will be automatically inferred from the source data. +} +\examples{ + +prec <- tibble::tribble( + ~vs, ~max_int, ~max_dec, + 0, 1, 1, + 1, 2, 2 +) + +tplyr_table(mtcars, gear) \%>\% + add_layer( + group_desc(wt, by = vs) \%>\% + set_format_strings( + 'Mean (SD)' = f_str('a.a+1 (a.a+2)', mean, sd) + ) \%>\% + set_precision_data(prec) \%>\% + set_precision_on(wt) + ) \%>\% + build() + +} diff --git a/man/set_stats_as_columns.Rd b/man/set_stats_as_columns.Rd new file mode 100644 index 00000000..a0935f65 --- /dev/null +++ b/man/set_stats_as_columns.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/desc_bindings.R +\name{set_stats_as_columns} +\alias{set_stats_as_columns} +\title{Set descriptive statistics as columns} +\usage{ +set_stats_as_columns(e, stats_as_columns = TRUE) +} +\arguments{ +\item{e}{\code{desc_layer} on descriptive statistics summaries should be represented as columns} + +\item{stats_as_columns}{Boolean to set stats as columns} +} +\value{ +The input tplyr_layer +} +\description{ +In many cases, treatment groups are represented as columns within a table. +But some tables call for a transposed presentation, where the treatment +groups displayed by row, and the descriptive statistics are represented as +columns. \code{set_stats_as_columns()} allows Tplyr to output a built table +using this transposed format and deviate away from the standard +representation of treatment groups as columns. +} +\details{ +This function leaves all specified by variables intact. The only switch that +happens during the build process is that the provided descriptive statistics +are transposed as columns and the treatment variable is left as rows. Column +variables will remain represented as columns, and multiple target variables +will also be respected properly. +} +\examples{ + +dat <- tplyr_table(mtcars, gear) \%>\% + add_layer( + group_desc(wt, by = vs) \%>\% + set_format_strings( + "n" = f_str("xx", n), + "sd" = f_str("xx.x", sd, empty = c(.overall = "BLAH")), + "Median" = f_str("xx.x", median), + "Q1, Q3" = f_str("xx, xx", q1, q3), + "Min, Max" = f_str("xx, xx", min, max), + "Missing" = f_str("xx", missing) + ) \%>\% + set_stats_as_columns() + ) \%>\% + build() + +} diff --git a/man/str_indent_wrap.Rd b/man/str_indent_wrap.Rd new file mode 100644 index 00000000..301ec59b --- /dev/null +++ b/man/str_indent_wrap.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/str_indent_wrap.R +\name{str_indent_wrap} +\alias{str_indent_wrap} +\title{Wrap strings to a specific width with hyphenation while preserving +indentation} +\usage{ +str_indent_wrap(x, width = 10, tab_width = 5) +} +\arguments{ +\item{x}{An input character vector} + +\item{width}{The desired width of elements within the output character vector} + +\item{tab_width}{The number of spaces to which tabs should be converted} +} +\value{ +A character vector with string wrapping applied +} +\description{ +\code{str_indent_wrap()} leverages \code{stringr::str_wrap()} under the hood, but takes +some extra steps to preserve any indentation that has been applied to a +character element, and use hyphenated wrapping of single words that run +longer than the allotted wrapping width. +} +\details{ +The function \code{stringr::str_wrap()} is highly efficient, but in the +context of table creation there are two select features missing - hyphenation +for long running strings that overflow width, and respect for pre-indentation +of a character element. For example, in an adverse event table, you may have +body system rows as an un-indented column, and preferred terms as indented +columns. These strings may run long and require wrapping to not surpass the +column width. Furthermore, for crowded tables a single word may be longer +than the column width itself. + +This function takes steps to resolve these two issues, while trying to +minimize additional overhead required to apply the wrapping of strings. + +Note: This function automatically converts tabs to spaces. Tab width varies +depending on font, so width cannot automatically be determined within a data +frame. As such, users can specify the width +} +\examples{ +ex_text1 <- c("RENAL AND URINARY DISORDERS", " NEPHROLITHIASIS") +ex_text2 <- c("RENAL AND URINARY DISORDERS", "\tNEPHROLITHIASIS") + +cat(paste(str_indent_wrap(ex_text1, width=8), collapse="\n\n"),"\n") +cat(paste(str_indent_wrap(ex_text2, tab_width=4), collapse="\n\n"),"\n") +} diff --git a/man/tplyr_meta.Rd b/man/tplyr_meta.Rd new file mode 100644 index 00000000..92a223f0 --- /dev/null +++ b/man/tplyr_meta.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/meta.R +\name{tplyr_meta} +\alias{tplyr_meta} +\title{Tplyr Metadata Object} +\usage{ +tplyr_meta(names = list(), filters = exprs()) +} +\arguments{ +\item{names}{List of symbols} + +\item{filters}{List of expressions} +} +\value{ +tplyr_meta object +} +\description{ +If a Tplyr table is built with the `metadata=TRUE` option specified, then +metadata is assembled behind the scenes to provide traceability on each +result cell derived. The functions `get_meta_result()` and +`get_meta_subset()` allow you to access that metadata by using an ID provided +in the row_id column and the column name of the result you'd like to access. +The purpose is of the row_id variable instead of a simple row index is to +provide a sort resistant reference of the originating column, so the output +Tplyr table can be sorted in any order but the metadata are still easily +accessible. +} +\details{ +The `tplyr_meta` object provided a list with two elements - names and +filters. The names contain every column from the target data.frame of the +Tplyr table that factored into the specified result cell, and the filters +contains all the necessary filters to subset the target data to create the +specified result cell. `get_meta_subset()` additionally provides a parameter to +specify any additional columns you would like to include in the returned +subset data frame. +} +\examples{ + +tplyr_meta( + names = rlang::quos(x, y, z), + filters = rlang::quos(x == 1, y==2, z==3) + ) + +} diff --git a/tests/testthat/_snaps/apply_formats.md b/tests/testthat/_snaps/apply_formats.md new file mode 100644 index 00000000..2f4a7828 --- /dev/null +++ b/tests/testthat/_snaps/apply_formats.md @@ -0,0 +1,6 @@ +# apply_formats works correctly applies f_str() formatting + + Problem while computing `fmt_example = apply_formats("a (xx.a)", hp, wt)`. + Caused by error: + ! Auto-precision is not currently supported within the `apply_formats()` context + diff --git a/tests/testthat/_snaps/count.md b/tests/testthat/_snaps/count.md index 3bfc719f..b6c6f380 100644 --- a/tests/testthat/_snaps/count.md +++ b/tests/testthat/_snaps/count.md @@ -1,10 +1,10 @@ # Count layer clauses with invalid syntax give informative error group_count `where` condition `bad == code` is invalid. Filter error: - Error in `h()`: - ! Problem with `filter()` input `..1`. - i Input `..1` is `bad == code`. - x object 'bad' not found + Error in `filter()`: + ! Problem while computing `..1 = bad == code`. + Caused by error in `mask$eval_all_filter()`: + ! object 'bad' not found # Total rows and missing counts are displayed correctly(0.1.5 Updates) @@ -122,7 +122,7 @@ 3)), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame" )) -# nested count layers can accecpt text values in the first variable +# Nested count layers can accept text values in the first variable Inner layers must be data driven variables @@ -154,3 +154,156 @@ The number of values of your second variable must be greater than the number of levels in your first variable +# set_numeric_threshold works as expected + + Code + build(t1) + Output + # A tibble: 2 x 6 + row_label1 var1_3 var1_4 var1_5 ord_layer_index ord_layer_1 + + 1 8 12 ( 80.0%) " 0 ( 0.0%)" " 2 ( 40.0%)" 1 0 + 2 Total 15 (100.0%) "12 (100.0%)" " 5 (100.0%)" 1 12 + +--- + + Code + build(t2) + Output + # A tibble: 3 x 6 + row_label1 var1_3 var1_4 var1_5 ord_layer_index ord_lay~1 + + 1 4 " 1 ( 6.7%)" " 8 ( 66.7%)" " 2 ( 40.0%)" 1 8 + 2 8 "12 ( 80.0%)" " 0 ( 0.0%)" " 2 ( 40.0%)" 1 0 + 3 Total "15 (100.0%)" "12 (100.0%)" " 5 (100.0%)" 1 12 + # ... with abbreviated variable name 1: ord_layer_1 + +--- + + Code + build(t3) + Output + # A tibble: 1 x 6 + row_label1 var1_3 var1_4 var1_5 ord_layer_index ord_layer_1 + + 1 Total 15 (100.0%) 12 (100.0%) " 5 (100.0%)" 1 12 + +--- + + Code + build(t4) + Output + # A tibble: 0 x 2 + # ... with 2 variables: row_label1 , ord_layer_index + +--- + + Code + build(t5) + Output + # A tibble: 3 x 6 + row_label1 var1_3 var1_4 var1_5 ord_layer_index ord_lay~1 + + 1 4 " 1 ( 6.7%)" " 8 ( 66.7%)" " 2 ( 40.0%)" 1 8 + 2 8 "12 ( 80.0%)" " 0 ( 0.0%)" " 2 ( 40.0%)" 1 0 + 3 Total "15 (100.0%)" "12 (100.0%)" " 5 (100.0%)" 1 12 + # ... with abbreviated variable name 1: ord_layer_1 + +--- + + Code + build(t6) + Output + # A tibble: 2 x 6 + row_label1 var1_3 var1_4 var1_5 ord_layer_index ord_layer_1 + + 1 8 12 ( 80.0%) " 0 ( 0.0%)" " 2 ( 40.0%)" 1 0 + 2 Total 15 (100.0%) "12 (100.0%)" " 5 (100.0%)" 1 12 + +--- + + Code + build(t7) + Output + # A tibble: 9 x 8 + row_label1 row_l~1 var1_~2 var1_~3 var1_~4 ord_l~5 ord_l~6 ord_l~7 + + 1 GASTROINTESTINAL DISO~ "GASTR~ " 6 ( ~ " 6 ( ~ " 3 ( ~ 1 1 Inf + 2 GASTROINTESTINAL DISO~ " DI~ " 3 ( ~ " 1 ( ~ " 2 ( ~ 1 1 1 + 3 GENERAL DISORDERS AND~ "GENER~ "11 ( ~ "21 ( ~ "21 ( ~ 1 2 Inf + 4 GENERAL DISORDERS AND~ " AP~ " 4 ( ~ " 7 ( ~ " 5 ( ~ 1 2 1 + 5 INFECTIONS AND INFEST~ "INFEC~ " 5 ( ~ " 4 ( ~ " 3 ( ~ 1 3 Inf + 6 INFECTIONS AND INFEST~ " UP~ " 4 ( ~ " 1 ( ~ " 1 ( ~ 1 3 1 + 7 SKIN AND SUBCUTANEOUS~ "SKIN ~ " 7 ( ~ "21 ( ~ "26 ( ~ 1 4 Inf + 8 SKIN AND SUBCUTANEOUS~ " ER~ " 4 ( ~ " 3 ( ~ " 2 ( ~ 1 4 1 + 9 SKIN AND SUBCUTANEOUS~ " PR~ " 3 ( ~ " 8 ( ~ " 7 ( ~ 1 4 2 + # ... with abbreviated variable names 1: row_label2, 2: var1_Placebo, + # 3: `var1_Xanomeline High Dose`, 4: `var1_Xanomeline Low Dose`, + # 5: ord_layer_index, 6: ord_layer_1, 7: ord_layer_2 + +--- + + Code + build(t8) + Output + # A tibble: 9 x 8 + row_label1 row_l~1 var1_~2 var1_~3 var1_~4 ord_l~5 ord_l~6 ord_l~7 + + 1 GASTROINTESTINAL DISO~ "GASTR~ " 6 ( ~ " 6 ( ~ " 3 ( ~ 1 3 Inf + 2 GASTROINTESTINAL DISO~ " DI~ " 3 ( ~ " 1 ( ~ " 2 ( ~ 1 3 2 + 3 GENERAL DISORDERS AND~ "GENER~ "11 ( ~ "21 ( ~ "21 ( ~ 1 21 Inf + 4 GENERAL DISORDERS AND~ " AP~ " 4 ( ~ " 7 ( ~ " 5 ( ~ 1 21 5 + 5 INFECTIONS AND INFEST~ "INFEC~ " 5 ( ~ " 4 ( ~ " 3 ( ~ 1 3 Inf + 6 INFECTIONS AND INFEST~ " UP~ " 4 ( ~ " 1 ( ~ " 1 ( ~ 1 3 1 + 7 SKIN AND SUBCUTANEOUS~ "SKIN ~ " 7 ( ~ "21 ( ~ "26 ( ~ 1 26 Inf + 8 SKIN AND SUBCUTANEOUS~ " ER~ " 4 ( ~ " 3 ( ~ " 2 ( ~ 1 26 2 + 9 SKIN AND SUBCUTANEOUS~ " PR~ " 3 ( ~ " 8 ( ~ " 7 ( ~ 1 26 7 + # ... with abbreviated variable names 1: row_label2, 2: var1_Placebo, + # 3: `var1_Xanomeline High Dose`, 4: `var1_Xanomeline Low Dose`, + # 5: ord_layer_index, 6: ord_layer_1, 7: ord_layer_2 + +# denom and distinct_denom values work as expected + + Code + build(t1) + Output + # A tibble: 5 x 6 + row_label1 var1_3 var1_4 var1_5 ord_layer~1 ord_l~2 + + 1 4 " 1/ 15 ( 6.7)" " 8/ 12 (66.7)" " 2/ 5 (40.0)" 1 8 + 2 6 " 2/ 15 (13.3)" " 4/ 12 (33.3)" " 1/ 5 (20.0)" 1 4 + 3 8 "12/ 15 (80.0)" " 0/ 12 ( 0.0)" " 2/ 5 (40.0)" 1 0 + 4 Missing " 0" " 0" " 0" 1 0 + 5 Total " 15 [100.0]" " 12 [100.0]" " 5 [100.0]" 1 12 + # ... with abbreviated variable names 1: ord_layer_index, 2: ord_layer_1 + +--- + + Code + build(t2) + Output + # A tibble: 3 x 6 + row_label1 var1_3 var1_4 var1_5 ord_l~1 ord_l~2 + + 1 4 " 1 1 1 15" " 2 2 8 12" " 1 1 2 ~ 1 1 + 2 6 " 1 1 2 15" " 2 2 4 12" " 1 1 1 ~ 1 2 + 3 8 " 1 1 12 15" " 0 2 0 12" " 1 1 2 ~ 1 3 + # ... with abbreviated variable names 1: ord_layer_index, 2: ord_layer_1 + +# denoms with distinct population data populates as expected + + Code + tab + Output + # A tibble: 1 x 8 + row_label1 var1_Dosed var1_Plac~1 var1_~2 var1_~3 var1_~4 ord_l~5 ord_l~6 + + 1 Any Body System 93 (55.4%) 32 (37.2%) 125 (4~ 43 (51~ 50 (59~ 1 NA + # ... with abbreviated variable names 1: var1_Placebo, 2: var1_Total, + # 3: `var1_Xanomeline High Dose`, 4: `var1_Xanomeline Low Dose`, + # 5: ord_layer_index, 6: ord_layer_1 + +# nested count layers error out when you try to add a total row + + You can't include total rows in nested counts. Instead, add a seperate layer for total counts. + diff --git a/tests/testthat/_snaps/desc.md b/tests/testthat/_snaps/desc.md index ffd5558a..8607c166 100644 --- a/tests/testthat/_snaps/desc.md +++ b/tests/testthat/_snaps/desc.md @@ -1,9 +1,30 @@ # Desc layer clauses with invalid syntax give informative error group_desc `where` condition `bad == code` is invalid. Filter error: - Error in `h()`: - ! Problem with `filter()` input `..1`. - i Input `..1` is `bad == code`. - x object 'bad' not found + Error in `filter()`: + ! Problem while computing `..1 = bad == code`. + Caused by error in `mask$eval_all_filter()`: + ! object 'bad' not found +# Stats as columns properly transposes the built data + + # A tibble: 3 x 7 + row_label1 var1_n var1_sd var2_n var2_sd ord_layer_index ord_layer_1 + + 1 3 "15" " 0.8" "15" " 0.3" 1 1 + 2 4 "12" " 0.6" "12" " 0.3" 1 2 + 3 5 " 5" " 0.8" " 5" " 0.4" 1 3 + +--- + + # A tibble: 3 x 11 + row_label1 var1_n_0 var1_sd_0 var1_n_1 var1_~1 var2_~2 var2_~3 var2_~4 var2_~5 + + 1 3 "15" " 0.8" "" "BLAH" "15" " 0.3" "" "BLAH" + 2 4 " 4" " 0.2" " 8" " 0.5" " 4" " 0.1" " 8" " 0.3" + 3 5 "" "BLAH" " 5" " 0.8" "" "BLAH" " 5" " 0.4" + # ... with 2 more variables: ord_layer_index , ord_layer_1 , and + # abbreviated variable names 1: var1_sd_1, 2: var2_n_0, 3: var2_sd_0, + # 4: var2_n_1, 5: var2_sd_1 + diff --git a/tests/testthat/_snaps/functional.md b/tests/testthat/_snaps/functional.md index 0b6afe18..441b8cdb 100644 --- a/tests/testthat/_snaps/functional.md +++ b/tests/testthat/_snaps/functional.md @@ -1,8 +1,6 @@ # all test tables can be built without errors or warnings - Problem with `mutate()` column `col_i`. - i `col_i = fct_expand(...)`. - x object 'col_i' not found + Problem while computing `col_i = fct_expand(...)`. Caused by error: ! object 'col_i' not found diff --git a/tests/testthat/_snaps/layer_templates.md b/tests/testthat/_snaps/layer_templates.md new file mode 100644 index 00000000..2bbfc7bb --- /dev/null +++ b/tests/testthat/_snaps/layer_templates.md @@ -0,0 +1,63 @@ +# Template errors correctly upon creation + + Invalid template - templates must start with an ellipsis (i.e. ...) passed to either group_count, group_desc, or group_shift. For example, group_count(...) + +--- + + Invalid template - templates must start with an ellipsis (i.e. ...) passed to either group_count, group_desc, or group_shift. For example, group_count(...) + +--- + + Functions called within `add_layer` must be part of `Tplyr` + +# Template errors correctly upon execution + + Template bad does not exist + +--- + + Arguments must be passed to `add_params` in a list. + +--- + + Arguments must be passed to `add_params` in a list. + +--- + + Arguments pass in `add_params` must be named + +--- + + Invalid template - templates must be created using `new_layer_template()` + +--- + + In use_template() the following parameters provided to add_params are invalid: test + +--- + + In use_template() the following parameters provided to add_params are missing: sort_col + +# Templates print appropriately + + $test1 + Template name: test1 + Template parameters: None + Template code: + { + group_count(...) %>% set_format_strings(f_str("xx (xx.x%)", n, pct)) + } + + $test2 + Template name: test2 + Template parameters: sort_meth, sort_col + Template code: + { + group_count(...) %>% set_format_strings(f_str("xx (xx.x%)", n, pct)) %>% set_order_count_method({ + sort_meth + }) %>% set_ordering_cols({ + sort_col + }) + } + + diff --git a/tests/testthat/_snaps/meta.md b/tests/testthat/_snaps/meta.md new file mode 100644 index 00000000..7c295a1c --- /dev/null +++ b/tests/testthat/_snaps/meta.md @@ -0,0 +1,67 @@ +# Metadata creation errors generate properly + + meta must be a tplyr_meta object + +--- + + meta must be a tplyr_meta object + +--- + + Filters must be provided as a list of calls + +--- + + Filters must be provided as a list of calls + +--- + + Names must be provided as a list of names + +--- + + Names must be provided as a list of names + +# Metadata extraction and extension error properly + + t must be a tplyr_table object + +--- + + t does not contain a metadata dataframe. Make sure the tplyr_table was built with `build(metadata=TRUE)` + +--- + + The provided metadata dataset must have a column named row_id + +--- + + row_id values in the provided metadata dataset are duplicates of row_id values in the Tplyr metadata. All row_id values must be unique. FALSE + +# Metadata extraction and extension work properly + + Code + get_metadata(t) + Output + # A tibble: 7 x 5 + row_id row_label1 var1_3 var1_4 var1_5 + + 1 d1_1 n + 2 d2_1 Mean (SD) + 3 d3_1 Median + 4 d4_1 Q1, Q3 + 5 d5_1 Min, Max + 6 d6_1 Missing + 7 x1_1 + +# Metadata print method is accurate + + Code + print(x) + Output + tplyr_meta: 3 names, 4 filters + Names: + a, b, c + Filters: + a == 1, b == 2, c == 3, x == "a" + diff --git a/tests/testthat/_snaps/meta_utils.md b/tests/testthat/_snaps/meta_utils.md new file mode 100644 index 00000000..cf86e38e --- /dev/null +++ b/tests/testthat/_snaps/meta_utils.md @@ -0,0 +1,28 @@ +# Metadata extractors error properly + + Invalid row_id selected. row_id must be provided as a string present in built Tplyr table. + +--- + + Invalid row_id selected. row_id must be provided as a string present in built Tplyr table. + +--- + + column must provided as a character string and a valid result column present in the built Tplyr dataframe + +--- + + column must provided as a character string and a valid result column present in the built Tplyr dataframe + +--- + + Specified column must be a result column + +--- + + add_cols must be provided using `dplyr::vars()` + +--- + + If querying metadata without a tplyr_table, a target must be provided + diff --git a/tests/testthat/_snaps/precision.md b/tests/testthat/_snaps/precision.md new file mode 100644 index 00000000..1c2ce44a --- /dev/null +++ b/tests/testthat/_snaps/precision.md @@ -0,0 +1,65 @@ +# Missing by variables are handled as specified in precision data + + The precision data provided is missing by variable cases: + vs + Datsun 710 1 + +--- + + The precision data provided is missing by variable cases: + vs + Datsun 710 1 + +--- + + 'arg' should be one of "error", "auto" + +--- + + # A tibble: 12 x 8 + row_label1 row_label2 var1_3 var1_4 var1_5 ord_l~1 ord_l~2 ord_l~3 + + 1 0 n " 12" " 2" " 4" 1 1 1 + 2 0 Mean (SD) "4.10 (0.768)" "2.75~ "2.91~ 1 1 2 + 3 0 Median "3.81" "2.75" "2.97" 1 1 3 + 4 0 Q1, Q3 "3.56, 4.36" "2.68~ "2.61~ 1 1 4 + 5 0 Min, Max "3.4, 5.4" "2.6,~ "2.1,~ 1 1 5 + 6 0 Missing " 0" " 0" " 0" 1 1 6 + 7 1 n " 3" " 10" " 1" 1 2 1 + 8 1 Mean (SD) "3.0467 (0.51842~ "2.59~ "1.51~ 1 2 2 + 9 1 Median "3.2150" "2.55~ "1.51~ 1 2 3 + 10 1 Q1, Q3 "2.8400, 3.3375" "2.00~ "1.51~ 1 2 4 + 11 1 Min, Max "2.465, 3.460" "1.61~ "1.51~ 1 2 5 + 12 1 Missing " 0" " 0" " 0" 1 2 6 + # ... with abbreviated variable names 1: ord_layer_index, 2: ord_layer_1, + # 3: ord_layer_2 + +# Data validation for external precision data works effectively + + Precision dataset must include the variables max_int and max_dec + +--- + + Precision dataset must include the variables max_int and max_dec + +--- + + max_int and max_dec in precision dataset must be valid integer values + +--- + + max_int and max_dec in precision dataset must be valid integer values + +--- + + By variable types mismatch between precision dataset and target data + +# Partially provided decimal precision caps populate correctly + + # A tibble: 3 x 3 + var1_Placebo `var1_Xanomeline High Dose` `var1_Xanomeline Low Dose` + + 1 322.2 ( 65.0) 298.8 ( 55.5) 287.1 ( 76.8) + 2 322.223 (64.969) 298.849 (55.543) 287.149 (76.822) + 3 322.2 (65.0) 298.8 (55.5) 287.1 (76.8) + diff --git a/tests/testthat/_snaps/riskdiff.md b/tests/testthat/_snaps/riskdiff.md index c4bd7621..7a376ad0 100644 --- a/tests/testthat/_snaps/riskdiff.md +++ b/tests/testthat/_snaps/riskdiff.md @@ -18,3 +18,7 @@ Invalid format names supplied. Count layers only accept the following format names: n_counts, riskdiff +# Error generates when duplicating riskdiff comparison values + + Comparison {4, 4} has duplicated values. Comparisons must not be duplicates + diff --git a/tests/testthat/_snaps/shift.md b/tests/testthat/_snaps/shift.md index 7ff62c37..713497ce 100644 --- a/tests/testthat/_snaps/shift.md +++ b/tests/testthat/_snaps/shift.md @@ -1,9 +1,9 @@ # Shift layer clauses with invalid syntax give informative error group_shift `where` condition `bad == code` is invalid. Filter error: - Error in `h()`: - ! Problem with `filter()` input `..1`. - i Input `..1` is `bad == code`. - x object 'bad' not found + Error in `filter()`: + ! Problem while computing `..1 = bad == code`. + Caused by error in `mask$eval_all_filter()`: + ! object 'bad' not found diff --git a/tests/testthat/_snaps/table.md b/tests/testthat/_snaps/table.md index 1340c97a..084879b4 100644 --- a/tests/testthat/_snaps/table.md +++ b/tests/testthat/_snaps/table.md @@ -5,18 +5,18 @@ # Table level where clauses with invalid syntax give informative error tplyr_table `where` condition `bad == code` is invalid. Filter error: - Error in `h()`: - ! Problem with `filter()` input `..1`. - i Input `..1` is `bad == code`. - x object 'bad' not found + Error in `filter()`: + ! Problem while computing `..1 = bad == code`. + Caused by error in `mask$eval_all_filter()`: + ! object 'bad' not found # Population data where clauses with invalid syntax give informative error Population data `pop_where` condition `bad == code` is invalid. Filter error: - Error in `h()`: - ! Problem with `filter()` input `..1`. - i Input `..1` is `bad == code`. - x object 'bad' not found + Error in `filter()`: + ! Problem while computing `..1 = bad == code`. + Caused by error in `mask$eval_all_filter()`: + ! object 'bad' not found If the population data and target data subsets should be different, use `set_pop_where`. diff --git a/tests/testthat/_snaps/utils.md b/tests/testthat/_snaps/utils.md index cedc4256..aab2a87f 100644 --- a/tests/testthat/_snaps/utils.md +++ b/tests/testthat/_snaps/utils.md @@ -1,8 +1,12 @@ -# Call must be quoted +# add_layer only accepts Tplyr functions - `call` must be a defused call, not a number. + Functions called within `add_layer` must be part of `Tplyr` + +--- -# By default, only `Tplyr` exported functions are allowed + Functions called within `add_layer` must be part of `Tplyr` + +--- Functions called within `add_layer` must be part of `Tplyr` diff --git a/tests/testthat/adlb.Rdata b/tests/testthat/adlb.Rdata new file mode 100644 index 00000000..19db3cf6 Binary files /dev/null and b/tests/testthat/adlb.Rdata differ diff --git a/tests/testthat/adsl.Rdata b/tests/testthat/adsl.Rdata new file mode 100644 index 00000000..b7b8f9a8 Binary files /dev/null and b/tests/testthat/adsl.Rdata differ diff --git a/tests/testthat/t_cap_comp.Rdata b/tests/testthat/t_cap_comp.Rdata new file mode 100644 index 00000000..953d28b3 Binary files /dev/null and b/tests/testthat/t_cap_comp.Rdata differ diff --git a/tests/testthat/t_uncap_comp.Rdata b/tests/testthat/t_uncap_comp.Rdata new file mode 100644 index 00000000..c6ffa08b Binary files /dev/null and b/tests/testthat/t_uncap_comp.Rdata differ diff --git a/tests/testthat/test-apply_formats.R b/tests/testthat/test-apply_formats.R new file mode 100644 index 00000000..f97f4286 --- /dev/null +++ b/tests/testthat/test-apply_formats.R @@ -0,0 +1,19 @@ +test_that("apply_formats works correctly applies f_str() formatting", { + out <- mtcars %>% + head(10) %>% + mutate( + fmt_example = apply_formats('xxx (xx.x)', hp, wt) + ) + + comp <- c("110 ( 2.6)", "110 ( 2.9)", " 93 ( 2.3)", "110 ( 3.2)", "175 ( 3.4)", + "105 ( 3.5)", "245 ( 3.6)", " 62 ( 3.2)", " 95 ( 3.1)", "123 ( 3.4)") + expect_equal(out$fmt_example, comp) + + expect_snapshot_error({ + mtcars %>% + head(10) %>% + mutate( + fmt_example = apply_formats('a (xx.a)', hp, wt) + ) + }) +}) diff --git a/tests/testthat/test-count.R b/tests/testthat/test-count.R index 9aadeb15..c73f09c4 100644 --- a/tests/testthat/test-count.R +++ b/tests/testthat/test-count.R @@ -106,40 +106,6 @@ t19 <- add_layers(t19, c19) t20 <- add_layers(t20, c20) test_that("Count layers are built as expected", { - expect_setequal(names(c1), c("by", "stats", "precision_on", "where", - "target_var", "precision_by", "layers")) - expect_setequal(names(c2), c("by", "stats", "precision_on", "where", - "target_var", "precision_by", "layers")) - expect_setequal(names(c3), c("by", "stats", "precision_on", "where", - "target_var", "precision_by", "layers")) - expect_setequal(names(c4), c("by", "stats", "precision_on", "where", - "target_var", "precision_by", "layers", - "format_strings")) - expect_setequal(names(c5), c("by", "stats", "precision_on", "where", - "target_var", "precision_by", "layers", - "include_total_row", "denoms_by", - "total_count_format", - "total_row_sort_value", "count_missings")) - expect_setequal(names(c6), c("by", "stats", "precision_on", "where", - "target_var", "precision_by", "layers", - "distinct_by")) - expect_setequal(names(c7), c("by", "stats", "precision_on", "where", - "target_var", "precision_by", "layers")) - expect_setequal(names(c8), c("by", "stats", "precision_on", "where", - "target_var", "precision_by", "layers", - "distinct_by", "format_strings")) - expect_setequal(names(c9), c("by", "stats", "precision_on", "where", - "target_var", "precision_by", "layers", - "indentation")) - expect_setequal(names(c10), c("by", "stats", "precision_on", "where", - "target_var", "precision_by", "layers", - "count_row_prefix")) - expect_setequal(names(c11), c("by", "stats", "precision_on", "where", - "target_var", "precision_by", "layers", - "ordering_cols")) - expect_setequal(names(c12), c("by", "stats", "precision_on", "where", - "target_var", "precision_by", "layers", - "format_strings", "result_order_var", "distinct_by")) expect_equal(unname(map_chr(c1$by, as_name)), character()) expect_equal(unname(map_chr(c2$by, as_name)), "am") @@ -200,16 +166,16 @@ test_that("Count layers are summarized without errors and warnings", { test_that("Count layers are processed as expected", { - expect_equal(dim(c1$numeric_data), c(9, 4)) - expect_equal(dim(c2$numeric_data), c(18, 5)) - expect_equal(dim(c3$numeric_data), c(36, 6)) - expect_equal(dim(c4$numeric_data), c(36, 6)) - expect_equal(dim(c5$numeric_data), c(39, 6)) + expect_equal(dim(c1$numeric_data), c(9, 6)) + expect_equal(dim(c2$numeric_data), c(18, 7)) + expect_equal(dim(c3$numeric_data), c(36, 8)) + expect_equal(dim(c4$numeric_data), c(36, 8)) + expect_equal(dim(c5$numeric_data), c(39, 8)) expect_equal(dim(c6$numeric_data), c(9, 6)) - expect_equal(dim(c7$numeric_data), c(27, 5)) + expect_equal(dim(c7$numeric_data), c(27, 7)) expect_equal(dim(c8$numeric_data), c(9, 6)) - expect_equal(dim(c9$numeric_data), c(27, 5)) - expect_equal(dim(c10$numeric_data), c(9, 4)) + expect_equal(dim(c9$numeric_data), c(27, 7)) + expect_equal(dim(c10$numeric_data), c(9, 6)) expect_type(c1$numeric_data$n, "double") expect_type(c2$numeric_data$n, "double") @@ -222,17 +188,6 @@ test_that("Count layers are processed as expected", { expect_type(c9$numeric_data$n, "double") expect_type(c10$numeric_data$n, "double") - expect_equal(dim(c1$formatted_data), c(3, 5)) - expect_equal(dim(c2$formatted_data), c(6, 7)) - expect_equal(dim(c3$formatted_data), c(12, 9)) - expect_equal(dim(c4$formatted_data), c(12, 9)) - expect_equal(dim(c5$formatted_data), c(13, 9)) - expect_equal(dim(c6$formatted_data), c(3, 5)) - expect_equal(dim(c7$formatted_data), c(9, 7)) - expect_equal(dim(c8$formatted_data), c(3, 5)) - expect_equal(dim(c9$formatted_data), c(9, 7)) - expect_equal(dim(c10$formatted_data), c(3, 5)) - expect_true(all(nchar(unlist(c1$formatted_data[, 2:4])) == 11)) expect_true(all(nchar(unlist(c2$formatted_data[, 3:5])) == 11)) expect_true(all(nchar(unlist(c3$formatted_data[, 4:6])) == 11)) @@ -323,7 +278,7 @@ test_that("missing counts can be displayed as expected", { test_that("Count layer clauses with invalid syntax give informative error", { t <- tplyr_table(mtcars, gear) %>% add_layer( - group_count(am, where=bad == code) + group_count(am, where = bad == code) ) expect_snapshot_error(build(t)) @@ -331,9 +286,6 @@ test_that("Count layer clauses with invalid syntax give informative error", { test_that("Nested count layers can be built with text by variables", { - expect_equal(dim(c13$numeric_data), c(27, 6)) - expect_equal(dim(c13$formatted_data), c(9, 9)) - expect_equal(c13$formatted_data$ord_layer_2, rep(2, 9)) }) @@ -524,7 +476,7 @@ test_that("distinct is changed to distinct_n with a warning", { }) -test_that("nested count layers can accecpt text values in the first variable", { +test_that("Nested count layers can accept text values in the first variable", { t <- tplyr_table(mtcars, gear) %>% add_layer( group_count(vars("All Cyl", cyl)) @@ -575,7 +527,7 @@ test_that("Variable names will be coersed into symbols", { expect_snapshot_warning(build(t2)) }) -test_that("nested count layers can be build with character value in first position and risk difference", { +test_that("nested count layers can be built with character value in first position and risk difference", { suppressWarnings({ t1 <- tplyr_table(mtcars, gear) %>% add_layer( @@ -640,7 +592,7 @@ test_that("nested count layers can be built with restrictive where logic", { t <- tplyr_table(mtcars, gear) %>% add_layer( - group_count(vars(cyl, grp), where = grp == "grp.8.5")%>% + group_count(vars(cyl, grp), where = grp == "grp.8.5") %>% set_nest_count(TRUE) %>% set_order_count_method('bycount') %>% set_ordering_cols("3") @@ -716,7 +668,7 @@ test_that("test IBM rounding option", { group_count(gender, by = "Gender") %>% set_format_strings(f_str("xxx (xxx%)", n, pct)) ) - expect_warning(tabl2 <- build(tabl2), "You have enabled IBM Rounding.") + expect_warning({tabl2 <- build(tabl2)}, "You have enabled IBM Rounding.") expect_equal(tabl2$var1_Placebo, c("485 ( 49%)", "515 ( 52%)")) @@ -739,8 +691,10 @@ test_that("Posix columns don't cause the build to error out.", { # load(test_path("adae.Rdata")) - adsl <- haven::read_xpt(test_path("adsl.xpt")) %>% - mutate(fake_dttm = as.POSIXct("2019-01-01 10:10:10"), origin = "1970-01-01") %>% + load(test_path("adsl.Rdata")) + + adsl <- adsl %>% + mutate(fake_dttm = as.POSIXct("2019-01-01 10:10:10"), origin = "1970-01-01") %>% rename(TRTA = TRT01A) tp_obj <- tplyr_table(adae, TRTA) %>% @@ -753,3 +707,148 @@ test_that("Posix columns don't cause the build to error out.", { expect_silent(build(tp_obj)) }) + +test_that("set_numeric_threshold works as expected", { + + t1 <- mtcars %>% + tplyr_table(gear) %>% + add_layer( + group_count(cyl) %>% + set_numeric_threshold(10, "n") %>% + add_total_row() %>% + set_order_count_method("bycount") + ) + + expect_snapshot(build(t1)) + + t2 <- mtcars %>% + tplyr_table(gear) %>% + add_layer( + group_count(cyl) %>% + set_numeric_threshold(5.1, "n") %>% + add_total_row() %>% + set_order_count_method("bycount") + ) + + expect_snapshot(build(t2)) + + t3 <- mtcars %>% + tplyr_table(gear) %>% + add_layer( + group_count(cyl) %>% + set_numeric_threshold(13, "n") %>% + add_total_row() %>% + set_order_count_method("bycount") + ) + + expect_snapshot(build(t3)) + + t4 <- mtcars %>% + tplyr_table(gear) %>% + add_layer( + group_count(cyl) %>% + set_numeric_threshold(16, "n") %>% + add_total_row() %>% + set_order_count_method("bycount") + ) + + expect_snapshot(build(t4)) + + t5 <- mtcars %>% + tplyr_table(gear) %>% + add_layer( + group_count(cyl) %>% + set_numeric_threshold(0.5, "pct") %>% + add_total_row() %>% + set_order_count_method("bycount") + ) + + expect_snapshot(build(t5)) + + t6 <- mtcars %>% + tplyr_table(gear) %>% + add_layer( + group_count(cyl) %>% + set_numeric_threshold(4, "n", "3") %>% + add_total_row() %>% + set_order_count_method("bycount") + ) + + expect_snapshot(build(t6)) + + load(test_path("adae.Rdata")) + + t7 <- adae %>% + tplyr_table(TRTA) %>% + add_layer( + group_count(vars(AEBODSYS, AEDECOD)) %>% + set_numeric_threshold(3, "n", "Placebo") + ) + + expect_snapshot(build(t7)) + + t8 <- adae %>% + tplyr_table(TRTA) %>% + add_layer( + group_count(vars(AEBODSYS, AEDECOD)) %>% + set_numeric_threshold(3, "n", "Placebo") %>% + set_order_count_method("bycount") + ) + + expect_snapshot(build(t8)) +}) + +test_that("denom and distinct_denom values work as expected", { + + + t1 <- tplyr_table(mtcars2, gear) %>% + add_layer( + group_count(cyl) %>% + set_missing_count(f_str("xx", n), Missing = NA) %>% + add_total_row(f_str("xxxxx [xx.x]", n, pct)) %>% + set_format_strings(f_str("xx/xxx (xx.x)", n, total, pct)) %>% + set_order_count_method("bycount") + ) + + expect_snapshot(build(t1)) + + t2 <- tplyr_table(mtcars, gear) %>% + add_layer( + group_count(cyl) %>% + set_distinct_by(am) %>% + set_format_strings(f_str("xxx xxx xxx xxx", distinct_n, distinct_total, n, total)) + ) + + expect_snapshot(build(t2)) +}) + +test_that("denoms with distinct population data populates as expected", { + load(test_path("adae.Rdata")) + load(test_path("adsl.Rdata")) + + tab <- tplyr_table(adae, TRTA) %>% + set_pop_data(adsl) %>% + set_pop_treat_var(TRT01A) %>% + add_total_group %>% + add_treat_grps(Dosed = c("Xanomeline High Dose", "Xanomeline Low Dose")) %>% + add_layer( + group_count("Any Body System") %>% + set_distinct_by(USUBJID) %>% + set_format_strings(f_str("xx (xx.x%)", distinct_n, distinct_pct)) + ) %>% + build() + + expect_snapshot(tab) +}) + +test_that("nested count layers error out when you try to add a total row", { + + # GH issue 92 + tab <- tplyr_table(mtcars, am) %>% + add_layer( + group_count(vars(cyl, grp)) %>% + add_total_row() + ) + + expect_snapshot_error(build(tab)) +}) diff --git a/tests/testthat/test-desc.R b/tests/testthat/test-desc.R index 29bb67a4..723a5f61 100644 --- a/tests/testthat/test-desc.R +++ b/tests/testthat/test-desc.R @@ -39,31 +39,6 @@ t6 <- add_layers(t6, d6) t7 <- add_layers(t7, d7) t8 <- add_layers(t8, d8) -test_that("group_desc are built as expected", { - expect_setequal(names(d1), c("by", "stats", "precision_on", "where", - "target_var", "precision_by", "layers")) - expect_setequal(names(d2), c("by", "stats", "precision_on", "where", - "target_var", "precision_by", "layers")) - expect_setequal(names(d3), c("by", "stats", "precision_on", "where", - "target_var", "precision_by", "layers")) - expect_setequal(names(d4), c("max_length", "format_strings", "by", "cap", - "row_labels", "stats", "keep_vars", - "precision_on", "custom_summaries", "where", - "target_var", "need_prec_table", "precision_by", - "trans_vars", "summary_vars", "layers" )) - expect_setequal(names(d5), c("by", "stats", "precision_on", "where", - "target_var", "precision_by", "layers")) - expect_setequal(names(d6), c("max_length", "format_strings", "by", "cap", - "row_labels", "stats", "keep_vars", - "precision_on", "custom_summaries", "where", - "target_var", "need_prec_table", "precision_by", - "trans_vars", "summary_vars", "layers")) - expect_setequal(names(d7), c("by", "stats", "precision_on", "where", - "target_var", "precision_by", "layers")) - expect_setequal(names(d8), c("by", "stats", "precision_on", "where", - "target_var", "precision_by", "layers")) -}) - test_that("Group_desc can be created without warnings and errors", { expect_silent(build(t1)) expect_silent(build(t2)) @@ -75,97 +50,6 @@ test_that("Group_desc can be created without warnings and errors", { expect_silent(build(t8)) }) -test_that("group_desc are processed as expected", { - - expect_setequal(names(d1), c("where", "need_prec_table", "built_target", - "summary_vars", "formatted_data", "trans_vars", - "target_var", "keep_vars", "cap", "precision_on", - "max_length", "layers", "row_labels", "by", - "precision_by", "stats", "numeric_data", - "format_strings", "prec")) - expect_setequal(names(d2), c("where", "need_prec_table", "built_target", - "summary_vars", "formatted_data", "trans_vars", - "target_var", "keep_vars", "cap", "precision_on", - "max_length", "layers", "row_labels", "by", - "precision_by", "stats", "numeric_data", - "format_strings", "prec")) - expect_setequal(names(d3), c("where", "need_prec_table", "built_target", - "summary_vars", "formatted_data", "trans_vars", - "target_var", "keep_vars", "cap", "precision_on", - "max_length", "layers", "row_labels", "by", - "precision_by", "stats", "numeric_data", - "format_strings", "prec")) - expect_setequal(names(d4), c("where", "need_prec_table", "built_target", - "summary_vars", "formatted_data", "trans_vars", - "target_var", "keep_vars", "cap", "precision_on", - "max_length", "layers", "row_labels", "by", - "precision_by", "stats", "numeric_data", - "format_strings", "custom_summaries")) - expect_setequal(names(d5), c("where", "need_prec_table", "built_target", - "summary_vars", "formatted_data", "trans_vars", - "target_var", "keep_vars", "cap", "precision_on", - "max_length", "layers", "row_labels", "by", - "precision_by", "stats", "numeric_data", - "format_strings", "prec")) - expect_setequal(names(d6), c("where", "need_prec_table", "built_target", - "summary_vars", "formatted_data", "trans_vars", - "target_var", "keep_vars", "cap", "precision_on", - "max_length", "layers", "row_labels", "by", - "precision_by", "stats", "numeric_data", - "format_strings", "custom_summaries")) - expect_setequal(names(d7), c("where", "need_prec_table", "built_target", - "summary_vars", "formatted_data", "trans_vars", - "target_var", "keep_vars", "cap", "precision_on", - "max_length", "layers", "row_labels", "by", - "precision_by", "stats", "numeric_data", - "format_strings", "prec")) - expect_setequal(names(d7), c("where", "need_prec_table", "built_target", - "summary_vars", "formatted_data", "trans_vars", - "target_var", "keep_vars", "cap", "precision_on", - "max_length", "layers", "row_labels", "by", - "precision_by", "stats", "numeric_data", - "format_strings", "prec")) - - expect_equal(dim(d1$numeric_data), c(27, 4)) - expect_equal(dim(d2$numeric_data), c(54, 5)) - expect_equal(dim(d3$numeric_data), c(108, 6)) - expect_equal(dim(d4$numeric_data), c(3, 4)) - expect_equal(dim(d5$numeric_data), c(54, 4)) - expect_equal(dim(d6$numeric_data), c(6, 4)) - expect_equal(dim(d7$numeric_data), c(54, 5)) - expect_equal(dim(d8$numeric_data), c(324, 6)) - - - expect_type(d1$numeric_data$value, "double") - expect_type(d2$numeric_data$value, "double") - expect_type(d3$numeric_data$value, "double") - expect_type(d4$numeric_data$value, "double") - expect_type(d5$numeric_data$value, "double") - expect_type(d6$numeric_data$value, "double") - expect_type(d7$numeric_data$value, "double") - expect_type(d8$numeric_data$value, "double") - - expect_equal(dim(d1$formatted_data), c(6, 5)) - expect_equal(dim(d2$formatted_data), c(12, 7)) - expect_equal(dim(d3$formatted_data), c(24, 9)) - expect_equal(dim(d4$formatted_data), c(1, 5)) - expect_equal(dim(d5$formatted_data), c(6, 8)) - expect_equal(dim(d6$formatted_data), c(1, 8)) - expect_equal(dim(d7$formatted_data), c(6, 8)) - expect_equal(dim(d8$formatted_data), c(36, 10)) - - - expect_true(!any(is.na(unlist(d1$formatted_data[, 2:4])))) - expect_true(!any(is.na(unlist(d2$formatted_data[, 2:4])))) - expect_true(!any(is.na(unlist(d3$formatted_data[, 4:6])))) - expect_true(!any(is.na(unlist(d4$formatted_data[, 2:4])))) - expect_true(!any(is.na(unlist(d5$formatted_data[, 2:7])))) - expect_true(!any(is.na(unlist(d6$formatted_data[, 2:7])))) - expect_true(!any(is.na(unlist(d7$formatted_data[, 2:7])))) - expect_true(!any(is.na(unlist(d8$formatted_data[, 3:8])))) - -}) - test_that("Auto precision builds correctly", { t_uncap <- tplyr_table(mtcars_long, gear) %>% @@ -180,8 +64,7 @@ test_that("Auto precision builds correctly", { 'Missing' = f_str('xxx', missing) ) ) %>% - build() %>% - mutate_at(vars(starts_with('var')), ~ str_trim(.x)) # Reading in the CSV removes leading spaces + build() t_cap <- tplyr_table(mtcars_long, gear) %>% add_layer( @@ -196,11 +79,10 @@ test_that("Auto precision builds correctly", { cap = c('int'=3, 'dec'=2) ) ) %>% - build() %>% - mutate_at(vars(starts_with('var')), ~ str_trim(.x)) # Reading in the CSV removes leading spaces + build() - t_uncap_comp <- readr::read_csv('t_uncap.csv') - t_cap_comp <- readr::read_csv('t_cap.csv') + load(test_path("t_uncap_comp.Rdata")) + load(test_path("t_cap_comp.Rdata")) expect_equal(mutate_all(t_uncap, as.character), mutate_all(t_uncap_comp, as.character), ignore_attr = TRUE) @@ -217,3 +99,47 @@ test_that("Desc layer clauses with invalid syntax give informative error", { expect_snapshot_error(build(t)) }) + +test_that("Stats as columns properly transposes the built data", { + + t1 <- tplyr_table(mtcars, gear) %>% + add_layer( + group_desc(vars(wt, drat)) %>% + set_format_strings( + "n" = f_str("xx", n), + "sd" = f_str("xx.x", sd, empty = c(.overall = "BLAH")) + ) %>% + set_stats_as_columns() + ) + + expect_silent(build(t1)) + + d1 <- build(t1) + + # Make sure the names are as expected + t1_exp_names <- c("row_label1", "var1_n", "var1_sd", "var2_n", "var2_sd", "ord_layer_index", "ord_layer_1") + expect_equal(names(d1), t1_exp_names) + expect_snapshot_output(d1) + + # Check that cols evaluate properly as well + t2 <- tplyr_table(mtcars, gear, cols=am) %>% + add_layer( + group_desc(vars(wt, drat)) %>% + set_format_strings( + "n" = f_str("xx", n), + "sd" = f_str("xx.x", sd, empty = c(.overall = "BLAH")) + ) %>% + set_stats_as_columns() + ) + + expect_silent(build(t2)) + + d2 <- build(t2) + + t2_exp_names <- c('row_label1', 'var1_n_0', 'var1_sd_0', 'var1_n_1', 'var1_sd_1', 'var2_n_0', + 'var2_sd_0', 'var2_n_1', 'var2_sd_1', 'ord_layer_index', 'ord_layer_1') + + expect_equal(names(d2), t2_exp_names) + expect_snapshot_output(d2) + +}) diff --git a/tests/testthat/test-layer_templates.R b/tests/testthat/test-layer_templates.R new file mode 100644 index 00000000..12bb30a2 --- /dev/null +++ b/tests/testthat/test-layer_templates.R @@ -0,0 +1,235 @@ +op <- options() + +# Template doesn't exist +load(test_path('adsl.Rdata')) + +test_that("Template errors correctly upon creation", { + + # Enforce ellipsis + # Invalid starting place + expect_snapshot_error( + new_layer_template( + "test_template", + set_format_strings() + ) + ) + + # Didn't use ellispsis + expect_snapshot_error( + new_layer_template( + "test_template", + group_count(adsl, TRT01P) + ) + ) + + # Non-Tplyr functions + expect_snapshot_error( + new_layer_template( + "test_template", + group_count(...) %>% + print() + ) + ) + + # template exists warning + expect_warning({ + new_layer_template("test_template", group_count(...)) + new_layer_template("test_template", group_count(...)) + }) + + # This makes sense here - remove_layer_template effectively removes layer templates + expect_silent(remove_layer_template("test_template")) + expect_true(!("test_template" %in% names(getOption('tplyr.layer_templates')))) + expect_warning(remove_layer_template("test_template"), "No template named") +}) + +test_that("Templates create effectively", { + # Basic template + expect_silent( + new_layer_template( + "test_template", + group_count(...) + ) + ) + + expect_true('test_template' %in% names(getOption('tplyr.layer_templates'))) + + remove_layer_template("test_template") + + # Templates identify additional params + new_layer_template( + "test_template", + group_count(...) %>% + set_order_count_method({sort_meth}) %>% + set_ordering_cols({sort_col}) + ) + + expect_equal(attr(get_layer_template('test_template'), 'params'), c("sort_meth", "sort_col")) + + remove_layer_template("test_template") +}) + +test_that("Template errors correctly upon execution", { + new_layer_template( + "test1", + group_count(...) %>% + set_format_strings(f_str("xx (xx.x%)", n, pct)) + ) + + new_layer_template( + "test2", + group_count(...) %>% + set_format_strings(f_str("xx (xx.x%)", n, pct)) %>% + set_order_count_method({sort_meth}) %>% + set_ordering_cols({sort_col}) + ) + + expect_snapshot_error( + tplyr_table(adsl, TRT01P) %>% + add_layer( + use_template('bad', RACE) + ) + ) + + # Args aren't in list + expect_snapshot_error( + tplyr_table(adsl, TRT01P) %>% + add_layer( + use_template('test2', RACE, add_params = "bad") + ) + ) + + expect_snapshot_error( + tplyr_table(adsl, TRT01P) %>% + add_layer( + use_template('test2', RACE, add_params = vars(USUBJID)) + ) + ) + + # Args must be named + expect_snapshot_error( + tplyr_table(adsl, TRT01P) %>% + add_layer( + use_template('test2', RACE, add_params = list("bycount")) + ) + ) + + # Invalid template + options('tplyr.layer_templates' = append(getOption('tplyr.layer_templates'), list(bad = "bad"))) + + # Args must be named + expect_snapshot_error( + tplyr_table(adsl, TRT01P) %>% + add_layer( + use_template('bad', RACE) + ) + ) + + remove_layer_template("bad") + + # Param mismatches + # Extra param in call + expect_snapshot_error( + tplyr_table(adsl, TRT01P) %>% + add_layer( + use_template('test2', RACE, add_params = list( + sort_meth = "bycount", + sort_col = Placebo, + test = vars(a, b, c) + )) + ) + ) + # Missing param in call + expect_snapshot_error( + tplyr_table(adsl, TRT01P) %>% + add_layer( + use_template('test2', RACE, add_params = list( + sort_meth = "bycount" + )) + ) + ) + + remove_layer_template("test1") + remove_layer_template("test2") +}) + +test_that("Templates can be used correctly", { + new_layer_template( + "test1", + group_count(...) %>% + set_format_strings(f_str("xx (xx.x%)", n, pct)) + ) + + new_layer_template( + "test2", + group_count(...) %>% + set_format_strings(f_str("xx (xx.x%)", n, pct)) %>% + set_order_count_method({sort_meth}) %>% + set_ordering_cols({sort_col}) + ) + + # NULL params on template without params + t1 <- tplyr_table(adsl, TRT01P) %>% + add_layer( + group_count(RACE) %>% + set_format_strings(f_str("xx (xx.x%)", n, pct)) + ) %>% + build() + + t2 <- tplyr_table(adsl, TRT01P) %>% + add_layer( + use_template("test1", RACE) + ) %>% + build() + + expect_equal(t1, t2) + + # Params passed through - this tests both quoted and non-quoted args + t3 <- tplyr_table(adsl, TRT01P) %>% + add_layer( + group_count(RACE, by=ETHNIC) %>% + set_format_strings(f_str("xx (xx.x%)", n, pct)) %>% + set_order_count_method("bycount") %>% + set_ordering_cols(`Xanomeline Low Dose`) + ) %>% + build() + + t4 <- tplyr_table(adsl, TRT01P) %>% + add_layer( + use_template("test2", RACE, by=ETHNIC, add_params = + list( + sort_meth = "bycount", + sort_col = `Xanomeline Low Dose` + )) + ) %>% + build() + + expect_equal(t3, t4) +}) + +test_that("Templates are extensible", { + + t1 <- tplyr_table(adsl, TRT01P) %>% + add_layer( + group_count(RACE) %>% + set_format_strings(f_str("xx (xx.x%)", n, pct)) %>% + add_total_row() + ) %>% + build() + + t2 <- tplyr_table(adsl, TRT01P) %>% + add_layer( + use_template("test1", RACE) %>% + add_total_row() + ) %>% + build() + + expect_equal(t1, t2) + +}) + +test_that("Templates print appropriately", { + expect_snapshot_output(get_layer_templates()) +}) + +options(op) diff --git a/tests/testthat/test-meta.R b/tests/testthat/test-meta.R new file mode 100644 index 00000000..3dbc6e68 --- /dev/null +++ b/tests/testthat/test-meta.R @@ -0,0 +1,356 @@ +load(test_path('adae.Rdata')) +load(test_path('adsl.Rdata')) +load(test_path('adlb.Rdata')) + +adae <- adae %>% + filter(AEBODSYS %in% c("NERVOUS SYSTEM DISORDERS", "SKIN AND SUBCUTANEOUS TISSUE DISORDERS", + "PSYCHIATRIC DISORDERS" )) + +# Alter some reference indicators for shift +adlb[c(5, 10, 15, 20, 25, 30), 'ANRIND'] <- "H" +adlb[c(5, 10, 15, 20, 25, 30), 'BNRIND'] <- "L" + +# Insert a missing value +adsl$ETHNIC[1] <- NA_character_ + +# Define a function to flip factors to characters +fct2chr <- function(.data) { + .data %>% + mutate( + across(where(is.factor), ~as.character(.x)) + ) +} + +# Table to test out totals, missings, table where, cols, by, unnested +# basic counts, and descriptive stats +t1 <- tplyr_table(adsl, TRT01A, where = SAFFL == "Y", cols=SEX) %>% + add_treat_grps( + Treated = c("Xanomeline High Dose", "Xanomeline Low Dose") + ) %>% + # Create a total group column + add_total_group() %>% + # Add a count layer for SEX + add_layer( + group_count(ETHNIC, by = RACE) %>% + set_denoms_by(TRT01A) %>% + # Make a total row + add_total_row(fmt=f_str("xx",n), count_missings=FALSE, sort_value=-Inf) %>% + # Change the total row label + set_total_row_label("n") %>% + # Add a missing count row, which is made up of any NA values + set_missing_count(f_str("xx", n), denom_ignore=TRUE, Missing = NA, Empty = "Blah") + ) %>% + # Add a descriptive statistics layer for AGE + add_layer( + group_desc(AGE, by = RACE) + ) + +dat1 <- t1 %>% + build(metadata=TRUE) + +# Table to test out character unnested, and nested counts, layer where +t2 <- tplyr_table(adae, TRTA) %>% + add_layer( + group_count("Text label", where = AESEV == "MODERATE") %>% + add_risk_diff( + c("Xanomeline High Dose", "Placebo") + ) + ) %>% + add_layer( + group_count(vars(AEBODSYS, AEDECOD)) + ) + +dat2 <- suppressWarnings(t2 %>% build(metadata=TRUE)) + +# Table to test out character outer for count layers +t3 <- tplyr_table(adsl, TRT01A) %>% + add_layer( + group_count(vars("Outer string", RACE)) + ) + +dat3 <- t3 %>% + build(metadata=TRUE) + +# Table for testing of Shift layers +t4 <- tplyr_table(adlb, TRTA, where = AVISIT != "") %>% + add_layer( + group_shift(vars(row = BNRIND, column=ANRIND), by=AVISIT) + ) + +dat4 <- t4 %>% + build(metadata=TRUE) + +test_that("Metadata creation errors generate properly", { + m <- tplyr_meta() + + # Not providing metadata object + expect_snapshot_error(add_variables(mtcars, quos(a))) + expect_snapshot_error(add_filters(mtcars, quos(a==1))) + + # Didn't provide filter + expect_snapshot_error(tplyr_meta(quos(a), 'x')) + expect_snapshot_error(add_filters(m, 'x')) + + # Didn't provide names + expect_snapshot_error(tplyr_meta('x')) + expect_snapshot_error(add_variables(m, 'x')) + +}) + +test_that("Exported metadata function construct metadata properly", { + m <- tplyr_meta(quos(a, b, c), quos(a==1, b==2, c==3)) + + expect_equal(m$names, quos(a, b, c)) + expect_equal(m$filters, quos(a==1, b==2, c==3)) + + m <- add_variables(m, quos(x)) + m <- add_filters(m, quos(x=="a")) + + expect_equal(m$names, quos(a, b, c, x)) + expect_equal(m$filters, quos(a==1, b==2, c==3, x=="a")) +}) + +test_that("Descriptive Statistics metadata backend assembles correctly", { + + # Standard treatment group + m1 <- get_meta_subset(t1, 'd7_2', 'var1_Placebo_M') + m1_comp <- t1$built_target %>% + filter( + RACE == "BLACK OR AFRICAN AMERICAN", + SEX == "M", + SAFFL == "Y", + TRT01A == "Placebo" + ) %>% + select(USUBJID, TRT01A, RACE, SEX, SAFFL, AGE) %>% + fct2chr() + + expect_equal(m1, m1_comp, ignore_attr=TRUE) + + # Total group + m2 <- get_meta_subset(t1, 'd7_2', 'var1_Total_F') + m2_comp <- t1$built_target %>% + filter( + RACE == "BLACK OR AFRICAN AMERICAN", + SEX == "F", + SAFFL == "Y", + TRT01A %in% c("Placebo", "Xanomeline High Dose", "Xanomeline Low Dose") + ) %>% + select(USUBJID, TRT01A, RACE, SEX, SAFFL, AGE) %>% + fct2chr() + + expect_equal(m2, m2_comp, ignore_attr=TRUE) + + # Treated group + m3 <- get_meta_subset(t1, 'd7_2', 'var1_Treated_F') + m3_comp <- t1$built_target %>% + filter( + RACE == "BLACK OR AFRICAN AMERICAN", + SEX == "F", + SAFFL == "Y", + TRT01A %in% c("Xanomeline High Dose", "Xanomeline Low Dose") + ) %>% + select(USUBJID, TRT01A, RACE, SEX, SAFFL, AGE) %>% + fct2chr() + + expect_equal(m3, m3_comp, ignore_attr=TRUE) +}) + +test_that("Count Layer metadata backend assembles correctly", { + + # Here use demographics t1 + # Standard treatment, normal row count + m1 <- get_meta_subset(t1, 'c6_1', 'var1_Placebo_M') + m1_comp <- t1$built_target %>% + filter( + RACE == "BLACK OR AFRICAN AMERICAN", + SEX == "M", + SAFFL == "Y", + TRT01A == "Placebo", + ETHNIC == "NOT HISPANIC OR LATINO" + ) %>% + select(USUBJID, TRT01A, RACE, SEX, SAFFL, ETHNIC) %>% + fct2chr() + + expect_equal(m1, m1_comp, ignore_attr=TRUE) + + # Total group, missing row + m2 <- get_meta_subset(t1, 'c11_1', 'var1_Total_F') + m2_comp <- t1$built_target %>% + filter( + RACE == "WHITE", + SEX == "F", + SAFFL == "Y", + TRT01A %in% c("Placebo", "Xanomeline High Dose", "Xanomeline Low Dose"), + is.na(ETHNIC) + ) %>% + select(USUBJID, TRT01A, RACE, SEX, SAFFL, ETHNIC) %>% + fct2chr() + + expect_equal(m2, m2_comp, ignore_attr=TRUE) + + # Treated group, total row + m3 <- get_meta_subset(t1, 'c13_1', 'var1_Treated_F') + m3_comp <- t1$built_target %>% + filter( + SEX == "F", + SAFFL == "Y", + TRT01A %in% c("Xanomeline High Dose", "Xanomeline Low Dose") + ) %>% + select(USUBJID, TRT01A, SEX, SAFFL, ETHNIC, RACE) %>% + fct2chr() + + expect_equal(m3, m3_comp, ignore_attr=TRUE) + + # Now using AE table t2 + # Unnested character target + m4 <- get_meta_subset(t2, "c1_1", "var1_Xanomeline Low Dose") + m4_comp <- t2$built_target %>% + filter( + AESEV == "MODERATE", + TRTA == "Xanomeline Low Dose" + ) %>% + select(USUBJID, TRTA, AESEV) %>% + fct2chr() + + expect_equal(m4, m4_comp, ignore_attr=TRUE) + + # Outer layer + m5 <- get_meta_subset(t2, "c1_2", "var1_Xanomeline High Dose") + m5_comp <- t2$built_target %>% + filter( + TRTA == "Xanomeline High Dose", + AEBODSYS == "NERVOUS SYSTEM DISORDERS" + ) %>% + select(USUBJID, TRTA, AEDECOD, AEBODSYS) %>% + fct2chr() + + expect_equal(m5, m5_comp, ignore_attr=TRUE) + + # Inner layer + m6 <- get_meta_subset(t2, 'c6_2', "var1_Xanomeline Low Dose") + m6_comp <- t2$built_target %>% + filter( + TRTA == "Xanomeline Low Dose", + AEBODSYS == "NERVOUS SYSTEM DISORDERS", + AEDECOD == "DIZZINESS" + ) %>% + select(USUBJID, TRTA, AEBODSYS, AEDECOD) %>% + fct2chr() + + expect_equal(m6, m6_comp, ignore_attr=TRUE) + + # Risk difference + m7 <- get_meta_subset(t2, 'c1_1', 'rdiff_Xanomeline High Dose_Placebo') + m7_comp <- t2$built_target %>% + filter( + AESEV == "MODERATE", + TRTA %in% c("Xanomeline High Dose", "Placebo") + ) %>% + select(USUBJID, TRTA, AESEV) %>% + fct2chr() + + expect_equal(m4, m4_comp, ignore_attr=TRUE) + + + # Character outer string + m8 <- get_meta_subset(t3, 'c1_1', 'var1_Placebo') + m8_comp <- t3$built_target %>% + filter( + TRT01A == "Placebo" + ) %>% + select(USUBJID, TRT01A, RACE) %>% + fct2chr() + + expect_equal(m8, m8_comp, ignore_attr=TRUE) + + m9 <- get_meta_subset(t3, 'c3_1', 'var1_Placebo') + m9_comp <- t3$built_target %>% + filter( + TRT01A == "Placebo", + RACE == "BLACK OR AFRICAN AMERICAN" + ) %>% + select(USUBJID, TRT01A, RACE) %>% + fct2chr() + + expect_equal(m9, m9_comp, ignore_attr=TRUE) + +}) + +test_that("Shift Layer metadata backend assembles correctly", { + m1 <- get_meta_subset(t4, 's3_1', 'var1_Placebo_H') + m1_comp <- t4$built_target %>% + filter( + BNRIND == "L", + ANRIND == "H", + AVISIT == "End of Treatment", + TRTA == "Placebo" + ) %>% + select(USUBJID, TRTA, AVISIT, ANRIND, BNRIND) %>% + fct2chr() + + expect_equal(m1, m1_comp, ignore_attr=TRUE) +}) + +test_that("metadata queried without Tplyr table queries effectively", { + # Pull out the dataframes directly + meta <- t1$metadata + dat <- t1$target + + m1 <- get_meta_subset(meta, 'd7_2', 'var1_Placebo_M', target = dat) + + m1_comp <- t1$built_target %>% + filter( + RACE == "BLACK OR AFRICAN AMERICAN", + SEX == "M", + SAFFL == "Y", + TRT01A == "Placebo", + ETHNIC == "NOT HISPANIC OR LATINO" + ) %>% + select(USUBJID, TRT01A, RACE, SEX, SAFFL, AGE) %>% + fct2chr() + + expect_equal(m1, m1_comp, ignore_attr=TRUE) +}) + +t <- tplyr_table(mtcars, gear) %>% + add_layer( + group_desc(wt) + ) + +test_that("Metadata extraction and extension error properly", { + + expect_snapshot_error(get_metadata(mtcars)) + + expect_snapshot_error(get_metadata(t)) + + dat <- t %>% build(metadata=TRUE) + + m <- tibble( + var1_3 = list(tplyr_meta()) + ) + + expect_snapshot_error(append_metadata(t, m)) + + m['row_id'] <- c("d1_1") + expect_snapshot_error(append_metadata(t, m)) + +}) + +test_that("Metadata extraction and extension work properly", { + + dat <- t %>% build(metadata=TRUE) + + m <- tibble( + row_id = 'x1_1', + var1_3 = list(tplyr_meta()) + ) + + t <- append_metadata(t, m) + expect_snapshot(get_metadata(t)) + +}) + +test_that("Metadata print method is accurate", { + x <- tplyr_meta(quos(a, b, c), quos(a==1, b==2, c==3, x=="a")) + expect_snapshot(print(x)) +}) diff --git a/tests/testthat/test-meta_utils.R b/tests/testthat/test-meta_utils.R new file mode 100644 index 00000000..66697c90 --- /dev/null +++ b/tests/testthat/test-meta_utils.R @@ -0,0 +1,31 @@ +t <- tplyr_table(mtcars, cyl) %>% + add_layer( + group_desc(hp) + ) + +dat <- t %>% build(metadata = TRUE) + +m <- t$metadata + +test_that("Metadata extractors error properly", { + # Invalid row ID + expect_snapshot_error(get_meta_result(t, "bad", 'var1_4')) + + # row_id not string + expect_snapshot_error(get_meta_result(t, 2, 'var1_4')) + + # Not a present column + expect_snapshot_error(get_meta_result(t, 'd1_1', "bad")) + + # Not a string + expect_snapshot_error(get_meta_result(t, 'd1_1', 2)) + + # Not a result column + expect_snapshot_error(get_meta_result(t, 'd1_1', 'row_label1')) + + # add_cols not vars + expect_snapshot_error(get_meta_subset(t, 'd1_1', 'var1_4', add_cols = "bad")) + + # get_meta_subset needs target + expect_snapshot_error(get_meta_subset(m, 'd1_1', 'var1_4')) +}) diff --git a/tests/testthat/test-precision.R b/tests/testthat/test-precision.R index bfbeafef..1247a4f8 100644 --- a/tests/testthat/test-precision.R +++ b/tests/testthat/test-precision.R @@ -3,8 +3,8 @@ mtcars_long <- mtcars %>% rownames_to_column(var = "model") %>% pivot_longer(cols = c('mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec')) -Tplyr:::make_prec_data(mtcars_long, quos(name), quo(value), cap=c('int'=99, 'dec'=99)) %>% - arrange(name) +# Tplyr:::make_prec_data(mtcars_long, quos(name), quo(value), cap=c('int'=99, 'dec'=99)) %>% +# arrange(name) test_that('Precision data calculates correctly', { # No by @@ -117,3 +117,137 @@ test_that('Caps work correctly', { }) +test_that("Precision data can be provided externally", { + # Mock up a precision data set + prec <- tibble::tribble( + ~vs, ~max_int, ~max_dec, + 0, 1, 1, + 1, 2, 2 + ) + + t <- tplyr_table(mtcars, gear) + l <- group_desc(t, wt, by = vs) %>% + set_precision_data(prec) + + t <-add_layers(t, l) + + # Proper data builds without error + expect_silent(build(t)) +}) + +test_that("Missing by variables are handled as specified in precision data",{ + + # Mock up a precision data set + prec2 <- tibble::tribble( + ~vs, ~max_int, ~max_dec, + 0, 1, 1 + ) + + expect_snapshot_error({ + t <- tplyr_table(mtcars, gear) + l <- group_desc(t, wt, by = vs) %>% + set_precision_data(prec2) + t <- add_layers(t, l) + build(t) + }) + + expect_snapshot_error({ + t <- tplyr_table(mtcars, gear) + l <- group_desc(t, wt, by = vs) %>% + set_precision_data(prec2, default="error") + t <- add_layers(t, l) + build(t) + }) + + expect_snapshot_error({ + t <- tplyr_table(mtcars, gear) + l <- group_desc(t, wt, by = vs) %>% + set_precision_data(prec2, default="blah") + t <- add_layers(t, l) + build(t) + }) + + expect_snapshot_output({ + t <- tplyr_table(mtcars, gear) + l <- group_desc(t, wt, by = vs) %>% + set_precision_data(prec2, default="auto") + t <- add_layers(t, l) + build(t) + }) + +}) + +test_that("Data validation for external precision data works effectively", { + # Mock up a precision data set + prec <- tibble::tribble( + ~vs, ~max_int, ~max_dec, + 0, 1, 1, + 1, 2, 2 + ) + + # max_int and max_dec must exist + p1 <- select(prec, -max_dec) + p2 <- select(prec, -max_int) + + t <- tplyr_table(mtcars, gear) + + expect_snapshot_error({ + l <- group_desc(t, wt, by = vs) %>% + set_precision_data(p1) + }) + + expect_snapshot_error({ + l <- group_desc(t, wt, by = vs) %>% + set_precision_data(p2) + }) + + # max_int and max_dec must be valid integers + p3 <- prec %>% mutate(max_int = max_int + .1) + p4 <- prec %>% mutate(max_dec = max_dec + .1) + + expect_snapshot_error({ + l <- group_desc(t, wt, by = vs) %>% + set_precision_data(p3) + }) + + expect_snapshot_error({ + l <- group_desc(t, wt, by = vs) %>% + set_precision_data(p4) + }) + + # by variable types match + p5 <- prec %>% mutate(vs = as.character(vs)) + + expect_snapshot_error({ + l <- group_desc(t, wt, by = vs) %>% + set_precision_data(p5) + t <- add_layers(t, l) + build(t) + }) +}) + + +test_that("Partially provided decimal precision caps populate correctly", { + + load(test_path('adlb.Rdata')) + + t <- tplyr_table(adlb, TRTA, where = PARAMCD == 'URATE') %>% + add_layer( + group_desc(AVAL) %>% + set_format_strings("Mean (SD)" = f_str("a.a (a.a)", mean, sd), cap = c(dec = 1)) + ) %>% + add_layer( + group_desc(AVAL) %>% + set_format_strings("Mean (SD)" = f_str("a.a (a.a)", mean, sd), cap = c(int = 1)) + ) %>% + add_layer( + group_desc(AVAL) %>% + set_format_strings("Mean (SD)" = f_str("a.a (a.a)", mean, sd), cap = c(int = 1, dec = 1)) + ) + + # In bug #20 this caused an error so expect build to complete correctly + expect_silent(d <- build(t)) + + # Manually verified these results look appropriate + expect_snapshot_output(print(d %>% select(starts_with('var1')))) +}) diff --git a/tests/testthat/test-riskdiff.R b/tests/testthat/test-riskdiff.R index 3c3346e6..644ffec0 100644 --- a/tests/testthat/test-riskdiff.R +++ b/tests/testthat/test-riskdiff.R @@ -264,3 +264,17 @@ test_that("Distinct or non-distinct values are chosen properly", { expect_true(!all(dat2$`rdiff_Xanomeline High Dose_Placebo` == dat3$`rdiff_Xanomeline High Dose_Placebo`)) }) + +test_that("Error generates when duplicating riskdiff comparison values", { + + expect_snapshot_error( + tplyr_table(mtcars, gear) %>% + add_layer( + group_count(cyl) %>% + add_risk_diff( + c("4", "4") + ) + ) + ) + +}) diff --git a/tests/testthat/test-shift.R b/tests/testthat/test-shift.R index 789c81e1..7d31bc02 100644 --- a/tests/testthat/test-shift.R +++ b/tests/testthat/test-shift.R @@ -67,11 +67,6 @@ test_that("group_shift outputs the expected numeric data", { }) test_that("group_shift outputs the expected formatted data", { - expect_equal(dim(s1$formatted_data), c(3, 11)) - expect_equal(dim(s2$formatted_data), c(3, 11)) - expect_equal(dim(s3$formatted_data), c(3, 11)) - - expect_equal(t4$layers[[1]]$formatted_data$row_label1, c("6", "8", "4")) }) diff --git a/tests/testthat/test-sort.R b/tests/testthat/test-sort.R index 1916854f..c77458dd 100644 --- a/tests/testthat/test-sort.R +++ b/tests/testthat/test-sort.R @@ -145,7 +145,7 @@ test_that("A group_desc layer can be ordered properly", { }) ##### Nested -adsl <- haven::read_xpt("adsl.xpt") +load(test_path('adsl.Rdata')) adsl$EOSSTTN <- unclass(as.factor(adsl$EOSSTT)) + 100 adsl$DCDECODN <- unclass(as.factor(adsl$DCDECOD)) + 100 adsl1 <- tplyr_table(adsl, TRT01A, cols = AGEGR1) %>% diff --git a/tests/testthat/test-str_indent_wrap.R b/tests/testthat/test-str_indent_wrap.R new file mode 100644 index 00000000..f96d2d61 --- /dev/null +++ b/tests/testthat/test-str_indent_wrap.R @@ -0,0 +1,24 @@ +test_that("str_indent_wrap errors properly", { + expect_error(str_indent_wrap(1), regexp = 'x must be a character vector') +}) + +test_that("str_indent_wrap wraps text properly", { + text1 <- c("RENAL AND URINARY DISORDERS", " NEPHROLITHIASIS") + text2 <- c("RENAL AND URINARY DISORDERS", "\tNEPHROLITHIASIS") + text3 <- c("RENAL AND URINARY DISORDERS", "\t\tNEPHROLITHIASIS") + + expect_equal( + str_indent_wrap(text1, width=8), + c("RENAL\nAND\nURINARY\nDISORDE-\nRS", " NEPHROL-\n ITHIASI-\n S") + ) + + expect_equal( + str_indent_wrap(text2, width=9, tab_width=4), + c("RENAL AND\nURINARY\nDISORDER-\nS"," NEPHROLI-\n THIASIS") + ) + + expect_equal( + str_indent_wrap(text3, width=9, tab_width=2), + c("RENAL AND\nURINARY\nDISORDER-\nS"," NEPHROLI-\n THIASIS") + ) +}) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 79a97d53..3910f81a 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1,66 +1,56 @@ -## modify_nested_call ---- -test_that("Call must be quoted", { - expect_snapshot_error(Tplyr:::modify_nested_call(mean(c(1,2,3)))) - c <- quo(tplyr_table(treat_var = Species)) - expect_silent(Tplyr:::modify_nested_call(c)) -}) - -test_that("With no additional parameters, a call returns unchanged", { - c <- quo(tplyr_table(treat_var = Species)) - r <- Tplyr:::modify_nested_call(c) - expect_equal(c, r) -}) - -test_that("By default, only `Tplyr` exported functions are allowed", { - # Non-tplyr function - c <- quo(mean(c(1,2,3))) - expect_snapshot_error(Tplyr:::modify_nested_call(c)) - - # Non-exported Tplyr function - # c <- quo(Tplyr:::modify_nested_call(quo(x %>% y))) - # expect_silent(Tplyr:::modify_nested_call(c)) - - # Exported Tplyr function - c <- quo(tplyr_table(treat_var = Species)) - expect_silent(Tplyr:::modify_nested_call(c)) -}) +## Make sure calls to add_layer all work correctly ---- +test_that("Layers are attached correctly in add_layers", { -test_that("Setting `allowable_calls` to null allows calls from any namespace", { - # Non-tplyr function - c <- quo(mean(c(1,2,3))) - expect_silent(Tplyr:::modify_nested_call(c, allowable_calls = NULL)) + # Basic call + t1 <- tplyr_table(mtcars, cyl) %>% + add_layer( + group_desc(mpg) + ) - # Non-exported Tplyr function - c <- quo(Tplyr:::modify_nested_call(quo(x %>% y))) - expect_silent(Tplyr:::modify_nested_call(c, allowable_calls = NULL)) - # Exported Tplyr function - c <- quo(var() %>% tplyr_table(treat_var = Species) %>% print()) - expect_silent(Tplyr:::modify_nested_call(c, allowable_calls = NULL)) -}) + # With piping + t2 <- tplyr_table(mtcars, cyl) %>% + add_layer( + group_desc(mpg) %>% + set_format_strings('Mean (SD)' = f_str('xxx (xxx)', mean, sd)) + ) -test_that("Calls are modified and evaluate - no piping", { - c <- quo(mean(c(1,2,3, NA))) - r <- Tplyr:::modify_nested_call(c, na.rm=TRUE, allowable_calls = NULL) - expect_true(is.na(eval(quo_get_expr(c)))) - expect_equal(eval(quo_get_expr(r)), 2) -}) -test_that("Calls are modified and evaluate - piping", { - c <- quo(mean(c(1,2,3, NA)) %>% print()) - r <- Tplyr:::modify_nested_call(c, na.rm=TRUE, allowable_calls = NULL) + # Native piping + t3 <- tplyr_table(mtcars, cyl) %>% + add_layer( + set_format_strings(group_desc(mpg), 'Mean (SD)' = f_str('xxx (xxx)', mean, sd)) + ) - expect_output(eval(quo_get_expr(c)), "NA") - expect_output(eval(quo_get_expr(r)), "2") + expect_identical(parent.env(t1$layers[[1]]), t1) + expect_identical(parent.env(t2$layers[[1]]), t2) + expect_identical(parent.env(t3$layers[[1]]), t3) }) -test_that("Multiple pipes are processed appropriately", { - c <- quo(c(1,2,3) %>% mean() %>% print()) - r <- Tplyr:::modify_nested_call(c, a = NA, allowable_calls = NULL) - - expect_output(eval(quo_get_expr(c)), "2") - expect_output(eval(quo_get_expr(r)), "NA") +test_that("add_layer only accepts Tplyr functions", { + expect_snapshot_error( + tplyr_table(mtcars, cyl) %>% + add_layer( + mean(c(1,2,3)) + ) + ) + + expect_snapshot_error( + tplyr_table(mtcars, cyl) %>% + add_layer( + group_desc(mpg) %>% + set_format_strings('Mean (SD)' = f_str('xxx (xxx)', mean, sd)) %>% + mean() + ) + ) + + expect_snapshot_error( + tplyr_table(mtcars, cyl) %>% + add_layer( + set_format_strings(mean(group_desc(mpg)), 'Mean (SD)' = f_str('xxx (xxx)', mean, sd)) + ) + ) }) ## apply_row_masks tests ---- diff --git a/vignettes/adas.Rdata b/vignettes/adas.Rdata new file mode 100644 index 00000000..b46e2bb1 Binary files /dev/null and b/vignettes/adas.Rdata differ diff --git a/vignettes/custom-metadata.Rmd b/vignettes/custom-metadata.Rmd new file mode 100644 index 00000000..3b1830cd --- /dev/null +++ b/vignettes/custom-metadata.Rmd @@ -0,0 +1,206 @@ +--- +title: "Creating Custom Tplyr Metadata" +output: + rmarkdown::html_vignette: + toc: true +vignette: > + %\VignetteIndexEntry{custom-metadata} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup, include=FALSE} +library(dplyr) +library(tidyr) +library(magrittr) +library(Tplyr) +library(knitr) +``` + +```{r data prep, echo=FALSE} +load("adas.Rdata") +load("adsl.Rdata") + +t <- tplyr_table(adas, TRTP, where=EFFFL == "Y" & ITTFL == "Y" & PARAMCD == "ACTOT" & ANL01FL == "Y") %>% + set_pop_data(adsl) %>% + set_pop_treat_var(TRT01P) %>% + set_pop_where(EFFFL == "Y" & ITTFL == "Y") %>% + set_distinct_by(USUBJID) %>% + set_desc_layer_formats( + 'n' = f_str('xx', n), + 'Mean (SD)' = f_str('xx.x (xx.xx)', mean, sd), + 'Median (Range)' = f_str('xx.x (xxx;xx)', median, min, max) + ) %>% + add_layer( + group_desc(AVAL, where= AVISITN == 0, by = "Baseline") + ) %>% + add_layer( + group_desc(AVAL, where= AVISITN == 24, by = "Week 24") + ) %>% + add_layer( + group_desc(CHG, where= AVISITN == 24, by = "Change from Baseline") + ) + +sum_data <- t %>% + build(metadata=TRUE) %>% + apply_row_masks(row_breaks = TRUE) %>% + select(row_id, starts_with('row_label'), + var1_Placebo, `var1_Xanomeline Low Dose`, `var1_Xanomeline High Dose`) + +# I don't need the full model code for this example so just mock it up. +# But if you want to see it, it's available here: +# https://github.com/RConsortium/submissions-pilot1/blob/694a207aca7e419513ffe16f6f5873526da1bdcb/R/eff_models.R#L17 +model_portion <- tibble::tribble( + ~"row_id", ~"row_label1", ~"var1_Xanomeline Low Dose", ~"var1_Xanomeline High Dose", + "x4_1", "p-value(Dose Response) [1][2]", "", "0.245", + "x4_2", "", "", "", + "x4_3", "p-value(Xan - Placebo) [1][3]", "0.569", "0.233", + "x4_4", " Diff of LS Means (SE)", "-0.5 (0.82)", "-1.0 (0.84)", + "x4_5", " 95% CI", "(-2.1;1.1)", "(-2.7;0.7)", + "x4_6", "", "", "", + "x4_7", "p-value(Xan High - Xan Low) [1][3]", "", "0.520", + "x4_8", " Diff of LS Means (SE)", "", "-0.5 (0.84)", + "x4_9", " 95% CI", "", "(-2.2;1.1)" +) + +full_data <- bind_rows(sum_data, model_portion) %>% + mutate( + across(where(is.character), ~ replace_na(., "")) + ) + + + +``` + +As covered in `vignette('metadata')`, Tplyr can produce metadata for any result that it calculates. But what about data that Tplyr can't produce, such as a efficacy results or some sort of custom analysis? You may still want that drill down capability either on your own or paired with an existing Tplyr table. + +Take for instance Table 14-3.01 from the [CDISC Pilot](https://github.com/atorus-research/CDISC_pilot_replication). Skipping the actual construction of the table, here's the output data from Tplyr and some manual calculation: + +```{r view data} +kable(full_data) +``` + +This is the primary efficacy table from the trial. The top portion of this table is fairly straightforward with Tplyr and can be done using descriptive statistic layers. Once you hit the p-values on the lower house, this becomes beyond Tplyr's remit. To produce the table, you can combine Tplyr output with a separate data frame analyzed and formatted yourself (but note you can still use some help from Tplyr tools like `apply_formats()`). + +But what about the metadata? How do you get the drill down capabilities for that lower half of the table? We've provided a couple additional tools in Tplyr to allow you to construct your own metadata and append existing metadata present in a Tplyr table. + +## Build a `tplyr_meta` object + +As covered in `vignette('metadata')`, a `tplyr_meta` object consists of two different fields: A list of variable names, and a list of filter conditions. You provide both of these fields as a list of quosures: + +```{r tplyr_meta} +m <- tplyr_meta( + names = quos(a, b, c), + filters = quos(a==1, b==2, c==3) +) +m +``` + +The `tplyr_meta()` function can take these fields immediately upon creation. If you need to dynamically create a `tplyr_meta` object such as how Tplyr constructs the objects internally), the functions `add_variables()` and `add_filters()` are available to extend an existing `tplyr_meta` object: + +```{r extending tplyr_meta} +m <- m %>% + add_variables(quos(x)) %>% + add_filters(quos(x == 'a')) + +m +``` + +## Building your own metadata table + +Now that we can create our own `tplyr_meta` objects, let's assemble the metadata for the bottom portion of Table 14-3.01: + +```{r build efficacy metadata} +# Overall model subset of data +meta <- tplyr_meta( + names = quos(TRTP, EFFFL, ITTFL, ANL01FL, SITEGR1, AVISIT, AVISITN, PARAMCD, AVAL, BASE, CHG), + filters = quos(EFFFL == "Y", ITTFL == "Y", PARAMCD == "ACTOT", ANL01FL == "Y", AVISITN == 24) +) + +# Xan High / Placebo contrast +meta_xhp <- meta %>% + add_filters(quos(TRTP %in% c("Xanomeline High Dose", "Placebo"))) + +# Xan Low / Placbo Contrast +meta_xlp <- meta %>% + add_filters(quos(TRTP %in% c("Xanomeline Low Dose", "Placebo"))) + +# Xan High / Xan Low Contrast +meta_xlh <- meta %>% + add_filters(quos(TRTP %in% c("Xanomeline High Dose", "Xanomeline Low Dose"))) + +eff_meta <- tibble::tribble( + ~"row_id", ~"row_label1", ~"var1_Xanomeline Low Dose", ~"var1_Xanomeline High Dose", + "x4_1", "p-value(Dose Response) [1][2]", NULL, meta, + "x4_3", "p-value(Xan - Placebo) [1][3]", meta_xlp, meta_xhp, + "x4_4", " Diff of LS Means (SE)", meta_xlp, meta_xhp, + "x4_5", " 95% CI", meta_xlp, meta_xhp, + "x4_7", "p-value(Xan High - Xan Low) [1][3]", NULL, meta_xlh, + "x4_8", " Diff of LS Means (SE)", NULL, meta_xlh, + "x4_9", " 95% CI", NULL, meta_xlh +) +``` + +Let's break down what happened here: + +- First, we assemble the the overarching metadata object for the model. A lot of this metadata is shared across each of the different result cells for all of the efficacy data, so we can start by collecting this information into a `tplyr_meta` object. +- Next, we can use that starting point to build `tplyr_meta` objects for the other result cells. The model data contains contrasts of each of the different treatment group comparisons. By using `add_filters()`, we can create those additional three `tplyr_meta` objects using the starting point and attaching an additional filter condition. +- Lastly, to extend the metadata in the original `tplyr_table` object that created the summary portion of this table, we need a data frame. There's a lot of ways to do this, but I like the display and explicitness of `tibble::tribble()`. + +When building a data frame for use with `tplyr_table` metadata, there are really only two rules: + +- You need a column in the data frame called `row_id` +- The `row_id` values cannot be duplicates of any other value within the existing metadata. + +The `row_id` values built by Tplyr will always follow the format "n_n", where the first letter of the layer type will either be "c", "d", or "s". The next number is the layer number (i.e. the order in which the layer was inserted to the Tplyr table), and then finally the row of that layer within the output. For example, the third row of a count layer that was the second layer in the table would have a `row_id` of "c2_3". In this example, I chose "x4_n" as the format for the "x" to symbolize custom, and these data can be thought of as the fourth layer. That said, these values would typically be masked by the viewer of the table so they really just need to be unique - so you can choose whatever you want. + +## Appending Existing Tplyr Metadata + +Now that we've created our custom extension of the Tplyr metadata, let's extend the existing data frame. To do this, Tplyr has the function `append_metadata()`: + +```{r extending metadata} +t <- append_metadata(t, eff_meta) +``` + +Behind the scenes, this function simply binds the new metadata with the old in the proper section of the `tplyr_table` object. You can view the the `tplyr_table` metadata with the function `get_metadata()`: + +```{r get_metadata} +get_metadata(t) +``` + +Finally, as with the automatically created metadata from Tplyr, we can query these result cells just the same: + +```{r query custom metadata} +get_meta_subset(t, 'x4_1', "var1_Xanomeline High Dose") %>% + head() %>% + kable() +``` + +## Metadata Without Tplyr + +You very well may have a scenario where you want to use these metadata functions outside of Tplyr in general. As such, there are S3 methods available to query metadata from a dataframe instead of a Tplyr table, and parameters to provide your own target data frame: + +```{r metadata without Tplyr} +get_meta_subset(eff_meta, 'x4_1', "var1_Xanomeline High Dose", target=adas) %>% + head() %>% + kable() +``` + +As with the Tplyr metadata, the only strict criteria here is that your custom metadata have a `row_id` column. + +## Tying it Together + +The vignette wouldn't be complete without the final contextual example - so here we go. Ultimately these pieces an all fit together in the context of a Shiny application and give you the desired click-through experience. + +```{r, out.width=850, out.extra='style="border: 1px solid #464646;" allowfullscreen="" allow="autoplay"', echo=FALSE} +knitr::include_app("http://michael-stackhouse.shinyapps.io/Tplyr-efficacy-shiny-demo", height = "900px") +``` + +_Source code available [here](https://github.com/atorus-research/Tplyr-efficacy-shiny-demo)_ diff --git a/vignettes/denom.Rmd b/vignettes/denom.Rmd index bc9f045d..dc6dfcf2 100644 --- a/vignettes/denom.Rmd +++ b/vignettes/denom.Rmd @@ -285,4 +285,6 @@ tplyr_table(adsl2, TRT01P) %>% Now the table is more intuitive. We used `set_missing_count()` to update our denominators, so missing have been excluded. Now, the total row intuitively matches the denominators used within each group, and we can see how many missing records were excluded. +_You may have stumbled upon this portion of the vignette while searching for how to create a total column. Tplyr allows you to do this as well with the function `add_total_group()` and read more in `vignette("table")`._ + And that's it for denominators! Happy counting! diff --git a/vignettes/desc.Rmd b/vignettes/desc.Rmd index 9d78a0aa..d45c7e5e 100644 --- a/vignettes/desc.Rmd +++ b/vignettes/desc.Rmd @@ -42,7 +42,6 @@ tplyr_table(adsl, TRT01P) %>% ) %>% build() %>% kable() - ``` Let's walk through this call to `set_format_strings` to understand in detail what's going on: @@ -151,7 +150,6 @@ tplyr_table(adsl, TRT01P) %>% build() %>% select(-starts_with("ord")) %>% kable() - ``` Here, a few important things are demonstrated: @@ -200,7 +198,12 @@ A lot of the nuance to formatting descriptive statistics layers has already been By default, if there is no available value for a summary in a particular observation, the result being presented will be blanked out. +_Note: Tplyr generally respects factor levels - so in instances of a missing row or column group, if the factor level is present, then the variable or row will still generate)_ + ```{r missing} +adsl$TRT01P <- as.factor(adsl$TRT01P) +adlb$TRTA <- as.factor(adlb$TRTA) + adlb_2 <- adlb %>% filter(TRTA != "Placebo") @@ -215,15 +218,11 @@ tplyr_table(adlb_2, TRTA) %>% head() %>% select(-starts_with("ord")) %>% kable() - ``` -Note how the entire example above has all records in `var1_Placebo` missing. 'Tplyr' gives you control over how you fill this space. Let's say that we wanted instead to make that space say "Missing". You can control this with the `f_str()` object using the `empty` parameter +Note how the entire example above has all records in `var1_Placebo` missing. 'Tplyr' gives you control over how you fill this space. Let's say that we wanted instead to make that space say "Missing". You can control this with the `f_str()` object using the `empty` parameter. ```{r missing1} -adlb_2 <- adlb %>% - filter(TRTA != "Placebo") - tplyr_table(adlb_2, TRTA) %>% set_pop_data(adsl) %>% set_pop_treat_var(TRT01P) %>% @@ -240,9 +239,6 @@ tplyr_table(adlb_2, TRTA) %>% Look at the `empty` parameter above. Here, we use a named character vector, where the name is `.overall`. When this name is used, if all elements within the cell are missing, they will be filled with the specified text. Otherwise, the provided string will fill just the missing parameter. In some cases, this may not be what you'd like to see. Perhaps we want a string that fills each missing space. ```{r missing2} -adlb_2 <- adlb %>% - filter(TRTA != "Placebo") - tplyr_table(adlb_2, TRTA) %>% set_pop_data(adsl) %>% set_pop_treat_var(TRT01P) %>% @@ -265,7 +261,6 @@ You may have noticed that the approach to formatting covered so far leaves a lot 'Tplyr' has this covered using auto-precision. Auto-precision allows you to format your numeric summaries based on the precision of the data collected. This has all been built into the format strings, because a natural place to specify your desired format is where you specify how you want your data presented. If you wish to use auto-precision, use `a` instead of `x` when creating your summaries. Note that only one `a` is needed on each side of a decimal. To use increased precision, use `a+n` where `n` is the number of additional spaces you wish to add. ```{r autoprecision1} - tplyr_table(adlb, TRTA) %>% add_layer( group_desc(AVAL, by = PARAMCD) %>% @@ -275,8 +270,8 @@ tplyr_table(adlb, TRTA) %>% ) %>% build() %>% head(20) %>% + select(-starts_with("ord")) %>% kable() - ``` As you can see, the decimal precision is now varying depending on the test being performed. Notice that both the integer and the decimal side of each number fluctuate as well. `Tpylr` collects both the integer and decimal precision, and you can specify both separately. For example, you could use `x`'s to specify a default number of spaces for your integers that are used consistently across by variables, but vary the decimal precision based on collected data. You can also increment the number of spaces for both integer and decimal separately. @@ -294,8 +289,8 @@ tplyr_table(adlb, TRTA) %>% ) %>% build() %>% head(20) %>% + select(-starts_with("ord")) %>% kable() - ``` Now that looks better. The `cap` argument is part of `set_format_strings()`. You need to specify the integer and decimal caps separately. Note that integer precision works slightly differently than decimal precision. Integer precision relates to the length allotted for the left side of a decimal, but integers will not truncate. When using 'x' formatting, if an integer exceeds the set length, it will push the number over. If the integer side of auto-precision is not capped, the necessary length for an integer in the associated by group will be as long as necessary. Decimals, on the other hand, round to the specified length. These caps apply to the length allotted for the "a" on either the integer or the decimal. So for example, if the decimal length is capped at 2 and the selected precision is "a+1", then 3 decimal places will be allotted. @@ -315,8 +310,69 @@ tplyr_table(adlb, TRTA) %>% ) %>% build() %>% head() %>% + select(-starts_with("ord")) %>% + kable() +``` + +Three variables are being summarized here - AVAL, CHG, and BASE. So which should be used for precision? `set_precision_on()` allows you to specify this, where the `precision_on()` variable must be one of the variables within `target_var`. Similarly, `set_precision_by()` changes the `by` variables used to determine collected precision. If no `precision_on()` variable is specified, the first variable in `target_var` is used. If no `precision_by` variables are specified, then the default `by` variables are used. + +### External Precision + +Lastly, while dynamic precision might be what you're looking for, you may not want precision driven by the data. Perhaps there's a company standard that dictates what decimal precision should be used for each separate lab test. Maybe even deeper down to the lab test and category. New in Tplyr 1.0.0 we've added the ability to take decimal precision from an external source. + +The principal of external precision is exactly the same as auto-precision. The only difference is that you - the user - provide the precision table that Tplyr was automatically calculating in the background. This is done using the new function `set_precision_data()`. In the output below, Notice how the precision by PARAMCD varies depending on what was specified in the data frame `prec_data`. + + +```{r external-precision} +prec_data <- tibble::tribble( + ~PARAMCD, ~max_int, ~max_dec, + "BUN", 1, 0, + "CA", 2, 4, + "CK", 3, 1, + "GGT", 3, 0, + "URATE", 3, 1, +) + +tplyr_table(adlb, TRTA) %>% + add_layer( + group_desc(AVAL, by = PARAMCD) %>% + set_format_strings( + 'Mean (SD)' = f_str('a.a+1 (a.a+2)', mean, sd, empty="NA") + ) %>% + set_precision_on(AVAL) %>% + set_precision_by(PARAMCD) %>% + set_precision_data(prec_data) + ) %>% + build() %>% + head() %>% + select(-starts_with("ord")) %>% kable() ``` -Three variables are being summarized here - AVAL, CHG, and BASE. So which should be used for precision? `set_precision_on()` allows you to specify this, where the `precision_on()` variable must be one of the variables within `target_var`. Similarly, `set_precision_by()` changes the `by` variables used to determine collected precision. If no `precision_on()` variable is specified, the first variable in `target_var` is used. If no `precision_by()` variables are specified, then the default `by` variables are used. +If one of your by variable groups are missing in the precision data, Tplyr can default back to using auto-precision by using the option `default=auto`. + +```{r external-precision2} +prec_data <- tibble::tribble( + ~PARAMCD, ~max_int, ~max_dec, + "BUN", 1, 0, + "CA", 2, 4, + "CK", 3, 1, + "GGT", 3, 0, +) + +tplyr_table(adlb, TRTA) %>% + add_layer( + group_desc(AVAL, by = PARAMCD) %>% + set_format_strings( + 'Mean (SD)' = f_str('a.a+1 (a.a+2)', mean, sd, empty="NA") + ) %>% + set_precision_on(AVAL) %>% + set_precision_by(PARAMCD) %>% + set_precision_data(prec_data, default="auto") + ) %>% + build() %>% + head() %>% + select(-starts_with("ord")) %>% + kable() +``` diff --git a/vignettes/layer_templates.Rmd b/vignettes/layer_templates.Rmd new file mode 100644 index 00000000..15952a96 --- /dev/null +++ b/vignettes/layer_templates.Rmd @@ -0,0 +1,121 @@ +--- +title: "Layer Templates" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{layer_templates} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup, echo=FALSE} +library(Tplyr) +library(knitr) +load('adsl.Rdata') +``` + +There are several scenarios where a layer template may be useful. Some tables, like demographics tables, may have many layers that will all essentially look the same. Categorical variables will have the same count layer settings, and continuous variables will have the same desc layer settings. A template allows a user to build those settings once per layer, then reference the template when the Tplyr table is actually built. Another scenario might be building a set of company layer templates that are built for standard tables to reduce the footprint of code across analyses. In either of these cases, the idea is the reduce the amount of redundant code necessary to create a table. + +Tplyr has already has a couple of mechanisms to reduce redundant application of formats. For example, `vignettes('tplyr_options')` shows how the options `tplyr.count_layer_default_formats`, `tplyr.desc_layer_default_formats`, and `tplyr.shift_layer_default_formats` can be used to create default format string settings. Additionally, you can set formats table wide using `set_count_layer_formats()`, `set_desc_layer_formats()`, or `set_shift_layer_formats()`. But what these functions and options _don't_ allow you to do is pre-set and reuse the settings for an entire layer, so all of the additional potential layer modifying functions are ignored. This is where layer templates come in. + +# Basic Templates + +The functions `new_layer_template()` and `use_template()` allow a user to create and use layer templates. Layer templates allow a user to pre-build and reuse an entire layer configuration, from the layer constructor down to all modifying functions. Furthermore, users can specify parameters they may want to be interchangeable. Additionally, layer templates are extensible, so a template can be use and then further extended with additional layer modifying functions. + +Consider the following example: + +```{r creating a template} +new_layer_template( + "example_template", + group_count(...) %>% + set_format_strings(f_str("xx (xx%)", n, pct)) +) +``` + +In this example, we've created a basic layer template. The template is named "example_template", and this is the name we'll use to reference the template when we want to use it. When the template is created, we start with the function `group_count(...)`. Note the use of the ellipsis (i.e. `...`). This is a required part of a layer template. Templates must start with a Tplyr layer constructor, which is one of the function `group_count()`, `group_desc()`, or `group_shift()`. The ellipsis is necessary because when the template is used, we are able to pass arguments directly into the layer constructor. For example: + +```{r using a template} +tplyr_table(adsl, TRT01P) %>% + add_layer( + use_template("example_template", RACE, by=ETHNIC) + ) %>% + build() %>% + kable() +``` + +Within `use_template()`, the first parameter is the template name. After that, we supply arguments as we normally would into `group_count()`, `group_desc()`, or `group_shift()`. Additionally, note that our formats have been applied just as they would be if we used `set_format_strings()` as specified in the template. Our template was applied, the table built with all of the settings appropriately. + +An additional feature of layer templates is that they act just as any other function would in a Tplyr layer. This means that they're also extensible and can be expanded on directly within a Tplyr table. For example: + +```{r extending a template} +tplyr_table(adsl, TRT01P) %>% + add_layer( + use_template("example_template", RACE) %>% + add_total_row() + ) %>% + build() %>% + kable() +``` + +Here we show two things - first, that the we called the template without the by variable argument from the previous example. This allows a template to have some flexibility depending on the context of its usage. Furthermore, we added the additional modifier function `add_total_row()`. In this example, we took the layer as constructed by the template and then modified that layer further. This may be useful if most but not all of a layer is reusable. The reusable portions can be put in a template, and the rest added using normal Tplyr syntax. + +## Templates With Parameters + +It's also possible to add interchangeable parameters into a layer template beyond the group constructor arguments. But this requires some special syntax. Consider the following template: + +```{r template with params} +new_layer_template("example_params", + group_count(...) %>% + set_format_strings(f_str("xx (xx.x%)", n, pct)) %>% + set_order_count_method({sort_meth}) %>% + set_ordering_cols({sort_col}) + ) +``` + +In this example, we create a template similar to the first example. But now we add two more modifying functions, `set_order_count_method()` and `set_ordering_cols()`. Within these functions, we've supplied interchangeable parameters to the template function, which are `sort_meth` and `sort_col`. In a Tplyr layer template, these parameters are supplied using curly brackets (i.e. {}). + +To specify these arguments when using the templater, we use the `use_template()` argument `add_params`. For example: + +```{r using params} +tplyr_table(adsl, TRT01P) %>% + add_layer( + use_template('example_params', RACE, add_params = + list( + sort_meth = "bycount", + sort_col = Placebo + )) + ) %>% + build() %>% + kable() +``` + +In the `add_params` parameter, you must supply a list. That list must also be named, where the element names (in this example, `sort_meth` and `sort_col`) match the parameter names in the template itself. If there's any mismatch between a template's parameters and the parameters provided to `add_params`, you will encounter an error. The values supplied to `add_param` are then exactly the arguments that you would supply to the matching field within the template (i.e. there's no extra quoting using `quo()` necessary to pass a symbol). + +# Viewing and Removing Templates + +If you want to view any available templates in your session, use the function `get_layer_templates()`. + +```{r view templates} +get_layer_templates() +``` + +You can view a specific template using `get_layer_template()`. + +```{r get a template} +get_layer_template("example_params") +``` + +Note that layer templates are of class `tplyr_layer_template`. They additionally carry the attribute `params` that specifies which parameters are available in the template, which can be seen in the output above. + +Finally, if you want to remove a layer from your session, use the function `remove_layer_template()` + +```{r remove a template} +remove_layer_template("example_params") +get_layer_templates() +``` diff --git a/vignettes/metadata.Rmd b/vignettes/metadata.Rmd new file mode 100644 index 00000000..c7325882 --- /dev/null +++ b/vignettes/metadata.Rmd @@ -0,0 +1,147 @@ +--- +title: "Tplyr Metadata" +output: + rmarkdown::html_vignette: + toc: true +vignette: > + %\VignetteIndexEntry{metadata} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup, include=FALSE} +library(dplyr) +library(tidyr) +library(magrittr) +library(Tplyr) +library(knitr) +load("adsl.Rdata") +``` + +Tplyr has a bit of a unique design, which might feel a bit weird as you get used to the package. The process flow of building a `tplyr_table()` object first, and then using `build()` to construct the data frame is different than programming in the tidyverse, or creating a ggplot. Why create the `tplyr_table()` object first? Why is the `tplyr_table()` object different than the resulting data frame? + +The purpose of the `tplyr_table()` object is to let Tplyr do more than just summarize data. As you build the table, all of the metadata around the table being built is maintained - the target variables being summarized, the grouped variables by row and column, the filter conditions necessary applied to the table and each layer. As a user, you provide this information to create the summary. But what about after the results are produced? Summarizing data inevitably leads to new questions. Within clinical summaries, you may want to know which subjects experienced an adverse event, or why the lab summaries of a particular visit's descriptive statistics are abnormal. Normally, you'd write a query to recreate the data that lead to that particular summary. Tplyr now allows you to immediately extract the input data or metadata that created an output result, thus providing traceability from the result back to the source. + +## Generating the Metadata + +Consider the following example: + +```{r table_creation} +t <- tplyr_table(adsl, TRT01P, where = SAFFL == "Y") %>% + add_layer( + group_count(RACE) + ) %>% + add_layer( + group_desc(AGE, where = EFFFL == "Y") + ) + +dat <- t %>% build(metadata=TRUE) + +kable(dat) +``` + +To trigger the creation of metadata, the `build()` function has a new argument `metadata`. By specifying `TRUE`, the underlying metadata within Tplyr are prepared in an extractable format. This is the only action a user needs to specify for this action to take place. + +When the `metadata` argument is used, a new column will be produced in the output dataframe called `row_id`. The `row_id` variable provides a persistent reference to a row of interest, even if the output dataframe is sorted. If you review `vignette("styled-table")`, note that we expect a certain amount of post processing and styling of the built data frame from Tplyr, to let you use whatever other packages you prefer. As such, this reference ID is necessary. + +## Extracting The Input Source + +So, let's cut to the chase. The most likely way you would use this metadata is to pull out the source data that created a cell. For this, we've provided the function `get_meta_subset()`. The only information that you need is the `row_id` and column name of the result cell of interest. For example, looking at the result above, what if we want to know who the 8 subjects in the Placebo group who where Black or African American: + +```{r meta_subset} +get_meta_subset(t, 'c2_1', 'var1_Placebo') %>% + kable() +``` + +By using the `row_id` and column, the dataframe is pulled right out for us. Notice that `USUBJID` was included by default, even though Tplyr there's no reference anywhere in the `tplyr_table()` to the variable `USUBJID`. This is because `get_meta_subset()` has an additional argument `add_cols` that allows you to specify additional columns you want included in the resulting dataframe, and has a default of USUBJID. So let's say we want additionally include the variable `SEX`. + +```{r add_vars} +get_meta_subset(t, 'c2_1', 'var1_Placebo', add_cols = vars(USUBJID, SEX)) %>% + kable() +``` + +Variables should be provided using `dplyr::vars()`, just like the `cols` argument on `tplyr_table()` and the `by` arguments in each layer type. + +As mentioned, the input source data can be extracted for any result cell created by Tplyr. So let's say we want to know the subjects relevant for the descriptive statistics around age in the Xanomeline High Dose group: + +```{r desc_stats} +get_meta_subset(t, 'd1_2', 'var1_Xanomeline High Dose') %>% + head(10) %>% + kable() +``` + +_Note: Trimmed for space_ + +Notice how the columns returned are different. First off, within the summary above, we pulled results from the descriptive statistics layer. The target variable for this layer was `AGE`, and as such `AGE` is returned in the resulting output. Additionally, a layer level `where` argument was used to subset to `EFFFL == "Y"`, which leads to `EFFFL` being included in the output as well. + +## Extracting a Result Cell's Metadata + +To extract the dataframe in `get_meta_subset()`, the metadata of the result cell needs to first be extracted. This metadata can be directly accessed using the function `get_meta_result()`. Using the last example of `get_meta_subset()` above: + +```{r tplyr_meta} +get_meta_result(t, 'd1_2', 'var1_Xanomeline High Dose') +``` + +The resulting output is a new object Tplyr called `tplyr_meta()`. This is a container of a relevent metadata for a specific result. The object itself is a list with two elements: `names` and `filters`. + +The `names` element contains quosures for each variable relevant to a specific result. This will include the target variable, the `by` variables used on the layer, the `cols` variables used on the table, and all variables included in any filter condition relevant to create the result. + +The `filters` element contains each filter condition (provided as calls) necessary to create a particular cell. This will include the table level `where` argument, the layer level `where` argument, the filter condition for the specific value of any `by` variable or `cols` variable necessary to create the cell, and similarly the filter for the treatment group of interest. + +The results are provided this was so that they can be unpacked directly into `dplyr` syntax when necessary, which is exactly what happens in `get_meta_subset()`. For example: + +```{r unpack} +m <- get_meta_result(t, 'd1_2', 'var1_Xanomeline High Dose') + +adsl %>% + filter(!!!m$filters) %>% + select(!!!m$names) %>% + head(10) %>% + kable() +``` + +_Note: Trimmed for space_ + +But - who says you can't let your imagination run wild? + +```{r to string print, eval=FALSE} +cat(c("adsl %>%\n", + " filter(\n ", + paste(purrr::map_chr(m$filters, ~ rlang::as_label(.)), collpase=",\n "), + ") %>%\n", + paste(" select(", paste(purrr::map_chr(m$names, rlang::as_label), collapse=", "), ")", sep="") +)) +``` + +``` +```{r to string content, results='asis', echo=FALSE} +cat(c("adsl %>%\n", + " filter(\n ", + paste(purrr::map_chr(m$filters, ~ rlang::as_label(.)), collpase=",\n "), + ") %>%\n", + paste(" select(", paste(purrr::map_chr(m$names, rlang::as_label), collapse=", "), ")", sep="") +)) +``` +``` + +## So, What Does This Get Me? + +So we get get metadata around a result cell, and we can get the exact results from a result cell. You just need a row ID and a column name. But - what does that get you? You can query your tables - and that's great. But how do you _use_ that. + +The idea behind this is really to support [Shiny](https://shiny.rstudio.com/). Consider this minimal application. Click any of the result cells within the table and see what happens. + + +```{r, out.width=850, out.extra='style="border: 1px solid #464646;" allowfullscreen="" allow="autoplay"', echo=FALSE} +knitr::include_app("https://michael-stackhouse.shinyapps.io/Tplyr-shiny-demo/", height = "900px") +``` + +_Source code available [here](https://github.com/atorus-research/Tplyr-shiny-demo)_ + +_That's_ what this is all about. The persistent row_id and column selection enables you to use something like Shiny to automatically query a cell based on its position in a table. Using click events and a package like [reactable](https://glin.github.io/reactable/), you can pick up the row and column selected and pass that information into `get_meta_result()`. Once you get the resulting data frame, it's up to you what you do with it, and you have the world of Shiny at the tip of your fingers. diff --git a/vignettes/readme.Rmd b/vignettes/readme.Rmd deleted file mode 100644 index 9f516c47..00000000 --- a/vignettes/readme.Rmd +++ /dev/null @@ -1,99 +0,0 @@ ---- -title: "README" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{readme} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -```{r setup, include=FALSE} -library(tidyverse) -library(magrittr) -library(Tplyr) -load("adae.Rdata") -load("adsl.Rdata") -``` - -# Tplyr Version 0.1.1 - -Welcome to Tplyr! This is the first full and stable release of our package. With this release comes a number of new enhancements, loads of new documentation, and our complete package qualification document. - -If you've been keeping up, here are the things that we've added since the Beta release in July: - -- Bug Fixes/Enhancements - - Count layers were re-factored to improve the execution efficiency - - Auto-precision now works without a `by` variable - - Several new assertions have been added to give clearer error messages - - Treatment groups within the population data will produce columns in the resulting build, even if no records exist for that treatment group in the target dataset - - Risk difference variable names will now populate properly when a `cols` argument is used - - Data frame attributes are cleaned prior to processing to prevent any merge/bind warnings during processing - - Total values within count layers are properly filled when the resulting count is 0 (largely impacts risk-difference calculations) -- Feature additions - - Shift layers are here! - - Flexibility when filling missing values has been enhanced for descriptive statistic layers - - Layers can now be named, and those names can be used in `get_numeric_data` and the new function `get_statistics_data` to get risk difference raw numbers. Data may also be filtered directly from both functions. - - Default formats can now be set via options or at the table level, which allows you to eliminate a great deal of redundant code - -As always, we welcome your feedback. If you spot a bug, would like to see a new feature, or if any documentation is unclear - submit an issue through GitHub right [here](https://github.com/atorus-research/Tplyr/issues). - -# What is Tplyr? - -[dplyr](https://dplyr.tidyverse.org/) from tidyverse is a grammar of data manipulation. So what does that allow you to do? It gives you, as a data analyst, the capability to easily and intuitively approach the problem of manipulating your data into an analysis ready form. `dplyr` conceptually breaks things down into verbs that allow you to focus on _what_ you want to do more than _how_ you have to do it. - -`Tplyr` is designed around a similar concept, but its focus is on building summary tables within the clinical world. In the pharmaceutical industry, a great deal of the data presented in the outputs we create are very similar. For the most part, most of these tables can be broken down into a few categories: - -- Counting for event based variables or categories -- Shifting, which is just counting a change in state with a 'from' and a 'to' -- Generating descriptive statistics around some continuous variable. - -For many of the tables that go into a clinical submission, at least when considering safety outputs, the tables are made up of a combination of these approaches. Consider a demographics table - and let's use an example from the PHUSE project Standard Analyses & Code Sharing - [Analyses & Displays Associated with Demographics, Disposition, and Medications in Phase 2-4 Clinical Trials and Integrated Summary Documents](https://phuse.s3.eu-central-1.amazonaws.com/Deliverables/Standard+Analyses+and+Code+Sharing/Analyses+%26+Displays+Associated+with+Demographics,+Disposition+and+Medication+in+Phase+2-4+Clinical+Trials+and+Integrated+Summary+Documents.pdf). - -![Demographics Table](./demo_table.png) - -When you look at this table, you can begin breaking this output down into smaller, redundant, components. These components can be viewed as 'layers', and the table as a whole is constructed by stacking the layers. The boxes in the image above represent how you can begin to conceptualize this. - -- First we have Sex, which is made up of n (%) counts. -- Next we have Age as a continuous variable, where we have a number of descriptive statistics, including n, mean, standard deviation, median, quartile 1, quartile 3, min, max, and missing values. -- After that we have age, but broken into categories - so this is once again n (%) values. -- Race - more counting, -- Ethnicity - more counting -- Weight - and we're back to descriptive statistics. - -So we have one table, with 6 summaries (7 including the next page, not shown) - but only 2 different approaches to summaries being performed. -In the same way that `dplyr` is a grammar of data manipulation, `Tplyr` aims to be a grammar of data summary. The goal of `Tplyr` is to allow you to program a summary table like you see it on the page, by breaking a larger problem into smaller 'layers', and combining them together like you see on the page. - -Enough talking - let's see some code. In these examples, we will be using data from the [PHUSE Test Data Factory]( https://advance.phuse.global/display/WEL/Test+Dataset+Factory) based on the [original pilot project submission package](https://www.cdisc.org/sdtmadam-pilot-project). Note: You can see our replication of the CDISC pilot using the PHUSE Test Data Factory data [here](https://github.com/atorus-research/CDISC_pilot_replication). - -```{r initial_demo} - -tplyr_table(adsl, TRT01P, where = SAFFL == "Y") %>% - add_layer( - group_desc(AGE, by = "Age (years)") - ) %>% - add_layer( - group_count(AGEGR1, by = "Age Categories n (%)") - ) %>% - build() - -``` - -## The TL;DR - -Here are some of the high level benefits of using `Tplyr`: - -- Easy construction of table data using an intuitive syntax -- Smart string formatting for your numbers that's easily specified by the user -- A great deal of flexibility in what is performed and how it's presented, without specifying hundreds of parameters -- Extensibility (in the future...) - we're going to open doors to allow you some level of customization. - -## Where Next - - diff --git a/vignettes/table.Rmd b/vignettes/table.Rmd index 2e6b085f..3d90c233 100644 --- a/vignettes/table.Rmd +++ b/vignettes/table.Rmd @@ -87,7 +87,7 @@ Note how in the above example, there are two new columns added to the data - `va ## Population Data -A last and very important aspect of table level properties in 'Tplyr' is the addition of a population dataset. In CDISC standards, datasets like `adae` only contain adverse events when the occur. This means that if a subject did not experience an adverse event, or did not experience an adverse event within the criteria that you're subsetting for, they don't appear in the dataset. When you're looking at the proportion of subject who experienced an adverse event compared to the total number of subjects in that cohort, `adae` itself leaves you no way to calculate that total - as the subjects won't exist in the data. +A last and very important aspect of table level properties in 'Tplyr' is the addition of a population dataset. In CDISC standards, datasets like `adae` only contain adverse events when they occur. This means that if a subject did not experience an adverse event, or did not experience an adverse event within the criteria that you're subsetting for, they don't appear in the dataset. When you're looking at the proportion of subject who experienced an adverse event compared to the total number of subjects in that cohort, `adae` itself leaves you no way to calculate that total - as the subjects won't exist in the data. 'Tplyr' allows you to provide a separate population dataset to overcome this. Furthermore, you are also able to provide a separate population dataset `where` parameter and a population treatment variable named `pop_treat_var`, as variable names may differ between the datasets.