From c7c34988ad2f6b74e610c62374cd40b7696d78a3 Mon Sep 17 00:00:00 2001 From: mattcieslak Date: Mon, 15 Sep 2025 14:40:30 -0400 Subject: [PATCH 1/9] first try --- NAMESPACE | 2 + R/ModelArray_Constructor.R | 215 ++++++++++++++++++++++- R/analyse.R | 240 ++++++++++++++++++++++++-- tests/testthat/test-ModelArray_wrap.R | 159 +++++++++++++++++ vignettes/wrap.Rmd | 182 +++++++++++++++++++ 5 files changed, 779 insertions(+), 19 deletions(-) create mode 100644 tests/testthat/test-ModelArray_wrap.R create mode 100644 vignettes/wrap.Rmd diff --git a/NAMESPACE b/NAMESPACE index 3bdcfd5..58316e9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,8 +4,10 @@ export("%>%") export(ModelArray) export(ModelArray.gam) export(ModelArray.lm) +export(ModelArray.wrap) export(analyseOneElement.gam) export(analyseOneElement.lm) +export(analyseOneElement.wrap) export(gen_gamFormula_contIx) export(gen_gamFormula_fxSmooth) export(numElementsTotal) diff --git a/R/ModelArray_Constructor.R b/R/ModelArray_Constructor.R index f02aa56..c2649a6 100644 --- a/R/ModelArray_Constructor.R +++ b/R/ModelArray_Constructor.R @@ -374,6 +374,7 @@ analyseOneElement.lm <- function(i_element, var.terms, var.model, num.subj.lthr, num.stat.output = NULL, flag_initiate = FALSE, + on_error = "stop", ...) { values <- scalars(modelarray)[[scalar]][i_element, ] @@ -406,7 +407,40 @@ analyseOneElement.lm <- function(i_element, # onemodel <- stats::lm(formula, data = dat, ...) # onemodel <- stats::lm(formula, data = dat, weights = myWeights,...) - onemodel <- do.call(stats::lm, arguments_lm) + onemodel <- tryCatch({ + do.call(stats::lm, arguments_lm) + }, error = function(e) { + msg <- paste0("analyseOneElement.lm error at element ", i_element, ": ", conditionMessage(e)) + if (on_error == "debug" && interactive()) { + message(msg) + browser() + } + if (on_error == "skip" || on_error == "debug") { + warning(msg) + if (flag_initiate == TRUE) { + return(structure(list(.lm_error_initiate = TRUE), class = "lm_error")) + } else { + return(structure(list(.lm_error_runtime = TRUE), class = "lm_error")) + } + } + stop(e) + }) + + if (inherits(onemodel, "lm_error")) { + if (flag_initiate == TRUE) { + toreturn <- list( + column_names = NaN, + list.terms = NaN + ) + return(toreturn) + } else { + onerow <- c( + i_element - 1, + rep(NaN, (num.stat.output - 1)) + ) + return(onerow) + } + } # explicitly passing arguments into lm, to avoid error of argument "weights" onemodel.tidy <- onemodel %>% broom::tidy() @@ -583,6 +617,7 @@ analyseOneElement.gam <- function(i_element, formula, modelarray, phenotypes, sc var.smoothTerms, var.parametricTerms, var.model, num.subj.lthr, num.stat.output = NULL, flag_initiate = FALSE, flag_sse = FALSE, + on_error = "stop", ...) { values <- scalars(modelarray)[[scalar]][i_element, ] @@ -604,7 +639,42 @@ analyseOneElement.gam <- function(i_element, formula, modelarray, phenotypes, sc arguments$data <- dat # explicitly passing arguments into command, to avoid error of argument "weights" - onemodel <- do.call(mgcv::gam, arguments) + onemodel <- tryCatch({ + do.call(mgcv::gam, arguments) + }, error = function(e) { + msg <- paste0("analyseOneElement.gam error at element ", i_element, ": ", conditionMessage(e)) + if (on_error == "debug" && interactive()) { + message(msg) + browser() + } + if (on_error == "skip" || on_error == "debug") { + warning(msg) + if (flag_initiate == TRUE) { + return(structure(list(.gam_error_initiate = TRUE), class = "gam_error")) + } else { + return(structure(list(.gam_error_runtime = TRUE), class = "gam_error")) + } + } + stop(e) + }) + + if (inherits(onemodel, "gam_error")) { + if (flag_initiate == TRUE) { + toreturn <- list( + column_names = NaN, + list.smoothTerms = NaN, + list.parametricTerms = NaN, + sp.criterion.attr.name = NaN + ) + return(toreturn) + } else { + onerow <- c( + i_element - 1, + rep(NaN, (num.stat.output - 1)) + ) + return(onerow) + } + } onemodel.tidy.smoothTerms <- onemodel %>% broom::tidy(parametric = FALSE) onemodel.tidy.parametricTerms <- onemodel %>% broom::tidy(parametric = TRUE) @@ -842,6 +912,147 @@ analyseOneElement.gam <- function(i_element, formula, modelarray, phenotypes, sc +#' Run a user-supplied function for one element +#' +#' @description +#' `analyseOneElement.wrap` runs a user-supplied function `FUN` on a single element's data. +#' It prepares the per-element data by attaching the element values as a new column named by `scalar` +#' to the provided `phenotypes` data.frame, then calls `FUN(data = dat, ...)`. +#' +#' @details +#' The user-supplied `FUN` should return either a one-row data.frame/tibble, a named list, or a named vector. +#' The result will be coerced to a one-row tibble and combined into the final results matrix across elements. +#' +#' @param i_element An integer, the i_th element, starting from 1. +#' @param user_fun A function that accepts at least an argument named `data` (the per-element +#' `phenotypes` with the response column appended) and returns a one-row data.frame/tibble, +#' named list, or named vector. +#' @param modelarray ModelArray class +#' @param phenotypes A data.frame of the cohort with columns of independent variables and covariates +#' @param scalar A character. The name of the element-wise scalar to be analysed +#' @param num.subj.lthr The minimal number of subjects with valid value in input h5 file +#' @param num.stat.output The number of output stat metrics (including `element_id`). +#' Required when `flag_initiate = TRUE`. +#' @param flag_initiate TRUE or FALSE, whether this is to initiate the new analysis to get column names +#' @param on_error Character: one of "stop", "skip", or "debug". When an error occurs while +#' executing the user function, choose whether to stop, skip returning all-NaN values for this +#' element, or drop into `browser()` (if interactive) then skip. Default: "stop". +#' @param ... Additional arguments forwarded to `FUN` +#' +#' @return If `flag_initiate==TRUE`, returns a list with `column_names`. +#' If `flag_initiate==FALSE`, returns a numeric vector representing the one-row result for this element +#' with `element_id` as the first value. +#' @export +#' @importFrom dplyr %>% +#' @import tibble +analyseOneElement.wrap <- function(i_element, user_fun, modelarray, phenotypes, scalar, + num.subj.lthr, num.stat.output = NULL, + flag_initiate = FALSE, + on_error = "stop", + ...) { + values <- scalars(modelarray)[[scalar]][i_element, ] + + ## check number of subjects with (in)valid values: + flag_sufficient <- NULL + num.subj.valid <- length(values[is.finite(values)]) + if (num.subj.valid > num.subj.lthr) { + flag_sufficient <- TRUE + } else { + flag_sufficient <- FALSE + } + + if (flag_sufficient == TRUE) { + dat <- phenotypes + dat[[scalar]] <- values + + arguments <- list(...) + arguments$data <- dat + + # Execute user function with error handling + result <- tryCatch({ + do.call(user_fun, arguments) + }, error = function(e) { + msg <- paste0("analyseOneElement.wrap error at element ", i_element, ": ", conditionMessage(e)) + if (on_error == "debug" && interactive()) { + message(msg) + browser() + } + if (on_error == "skip" || on_error == "debug") { + warning(msg) + if (flag_initiate == TRUE) { + return(structure(list(.wrap_error_initiate = TRUE), class = "wrap_error")) + } else { + return(structure(list(.wrap_error_runtime = TRUE), class = "wrap_error")) + } + } + stop(e) + }) + + # On error in user function, return NaNs according to flag + if (inherits(result, "wrap_error")) { + if (flag_initiate == TRUE) { + toreturn <- list( + column_names = NaN + ) + return(toreturn) + } else { + onerow <- c( + i_element - 1, + rep(NaN, (num.stat.output - 1)) + ) + return(onerow) + } + } + + # Coerce to one-row tibble + if ("data.frame" %in% class(result)) { + onerow_tbl <- tibble::as_tibble(result) + if (nrow(onerow_tbl) != 1) { + stop("The user function must return a one-row data.frame/tibble, a named list, or a named vector.") + } + } else if (is.list(result)) { + onerow_tbl <- tibble::as_tibble_row(result) + } else if (is.atomic(result)) { + if (is.null(names(result))) { + names(result) <- paste0("v", seq_along(result)) + } + onerow_tbl <- tibble::as_tibble_row(as.list(result)) + } else { + stop(paste0( + "Unsupported return type from user function. ", + "Return a one-row data.frame/tibble, named list, or named vector." + )) + } + + # add element_id as first column + colnames.temp <- colnames(onerow_tbl) + onerow_tbl <- onerow_tbl %>% tibble::add_column(element_id = i_element - 1, .before = colnames.temp[1]) + + if (flag_initiate == TRUE) { + toreturn <- list( + column_names = colnames(onerow_tbl) + ) + toreturn + } else { + # return numeric vector to be consistent with other analysers + as.numeric(onerow_tbl) + } + } else { # insufficient subjects + if (flag_initiate == TRUE) { + toreturn <- list( + column_names = NaN + ) + toreturn + } else { + onerow <- c( + i_element - 1, + rep(NaN, (num.stat.output - 1)) + ) + onerow + } + } +} + #' Write outputs from element-wise statistical analysis to the HDF5 file. #' #' @description diff --git a/R/analyse.R b/R/analyse.R index 2dd633c..a7b1abb 100644 --- a/R/analyse.R +++ b/R/analyse.R @@ -84,7 +84,8 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NU var.model = c("adj.r.squared", "p.value"), correct.p.value.terms = c("fdr"), correct.p.value.model = c("fdr"), num.subj.lthr.abs = 10, num.subj.lthr.rel = 0.2, - verbose = TRUE, pbar = TRUE, n_cores = 1, ...) { + verbose = TRUE, pbar = TRUE, n_cores = 1, + on_error = "stop", ...) { # data type assertions if (class(data) != "ModelArray") { stop("data's class is not ModelArray!") @@ -315,7 +316,7 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NU formula, data, phenotypes, scalar, var.terms, var.model, num.subj.lthr = num.subj.lthr, num.stat.output = NULL, - flag_initiate = TRUE, + flag_initiate = TRUE, on_error = on_error, ... ) if (is.nan(outputs_initiator$column_names)[1]) { # not sufficient subjects @@ -334,7 +335,7 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NU formula, data, phenotypes, scalar, var.terms, var.model, num.subj.lthr = num.subj.lthr, num.stat.output = NULL, - flag_initiate = TRUE, + flag_initiate = TRUE, on_error = on_error, ... ) if (!(is.nan(outputs_initiator$column_names)[1])) { @@ -368,7 +369,7 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NU formula, data, phenotypes, scalar, var.terms, var.model, num.subj.lthr = num.subj.lthr, num.stat.output = NULL, - flag_initiate = TRUE, + flag_initiate = TRUE, on_error = on_error, ... ) if (!(is.nan(outputs_initiator$column_names)[1])) { @@ -413,7 +414,7 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NU formula, data, phenotypes, scalar, var.terms, var.model, num.subj.lthr = num.subj.lthr, num.stat.output = num.stat.output, - flag_initiate = FALSE, + flag_initiate = FALSE, on_error = on_error, ... ) } else { @@ -425,7 +426,7 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NU formula, data, phenotypes, scalar, var.terms, var.model, num.subj.lthr = num.subj.lthr, num.stat.output = num.stat.output, - flag_initiate = FALSE, + flag_initiate = FALSE, on_error = on_error, ... ) } @@ -437,7 +438,7 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NU formula, data, phenotypes, scalar, var.terms, var.model, num.subj.lthr = num.subj.lthr, num.stat.output = num.stat.output, - flag_initiate = FALSE, + flag_initiate = FALSE, on_error = on_error, ... ) } else { @@ -446,7 +447,7 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NU formula, data, phenotypes, scalar, var.terms, var.model, num.subj.lthr = num.subj.lthr, num.stat.output = num.stat.output, - flag_initiate = FALSE, + flag_initiate = FALSE, on_error = on_error, ... ) } @@ -513,8 +514,6 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NU df_out # return } - - #' Run GAM for element-wise data #' #' @description @@ -949,7 +948,7 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = N formula, data, phenotypes, scalar, var.smoothTerms, var.parametricTerms, var.model, num.subj.lthr = num.subj.lthr, num.stat.output = NULL, - flag_initiate = TRUE, flag_sse = flag_sse, + flag_initiate = TRUE, flag_sse = flag_sse, on_error = on_error, ... ) if (is.nan(outputs_initiator$column_names)[1]) { # not sufficient subjects @@ -975,7 +974,7 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = N formula, data, phenotypes, scalar, var.smoothTerms, var.parametricTerms, var.model, num.subj.lthr = num.subj.lthr, num.stat.output = NULL, - flag_initiate = TRUE, flag_sse = flag_sse, + flag_initiate = TRUE, flag_sse = flag_sse, on_error = on_error, ... ) if (!(is.nan(outputs_initiator$column_names)[1])) { @@ -1009,7 +1008,7 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = N formula, data, phenotypes, scalar, var.smoothTerms, var.parametricTerms, var.model, num.subj.lthr = num.subj.lthr, num.stat.output = NULL, - flag_initiate = TRUE, flag_sse = flag_sse, + flag_initiate = TRUE, flag_sse = flag_sse, on_error = on_error, ... ) if (!(is.nan(outputs_initiator$column_names)[1])) { @@ -1061,7 +1060,7 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = N formula, data, phenotypes, scalar, var.smoothTerms, var.parametricTerms, var.model, num.subj.lthr = num.subj.lthr, num.stat.output = num.stat.output, - flag_initiate = FALSE, flag_sse = flag_sse, + flag_initiate = FALSE, flag_sse = flag_sse, on_error = on_error, ... ) } else { @@ -1073,7 +1072,7 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = N formula, data, phenotypes, scalar, var.smoothTerms, var.parametricTerms, var.model, num.subj.lthr = num.subj.lthr, num.stat.output = num.stat.output, - flag_initiate = FALSE, flag_sse = flag_sse, + flag_initiate = FALSE, flag_sse = flag_sse, on_error = on_error, ... ) } @@ -1085,7 +1084,7 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = N formula, data, phenotypes, scalar, var.smoothTerms, var.parametricTerms, var.model, num.subj.lthr = num.subj.lthr, num.stat.output = num.stat.output, - flag_initiate = FALSE, flag_sse = flag_sse, + flag_initiate = FALSE, flag_sse = flag_sse, on_error = on_error, ... ) } else { @@ -1094,7 +1093,7 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = N formula, data, phenotypes, scalar, var.smoothTerms, var.parametricTerms, var.model, num.subj.lthr = num.subj.lthr, num.stat.output = num.stat.output, - flag_initiate = FALSE, flag_sse = flag_sse, + flag_initiate = FALSE, flag_sse = flag_sse, on_error = on_error, ... ) } @@ -1321,3 +1320,210 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = N ### return df_out } + + +#' Run a user-supplied function for element-wise data +#' +#' @description +#' `ModelArray.wrap` runs a user-supplied function `FUN` for each requested element and +#' returns a tibble/data.frame of results combined across elements. +#' +#' @details +#' This provides a generic framework reusing ModelArray's per-element looping, alignment, +#' subject-thresholding, and parallelization. The user function will be called as +#' `FUN(data = dat, ...)` where `dat` is `phenotypes` with the response column named `scalar` +#' appended for the current element. The return value from `FUN` for a single element must be +#' either a one-row data.frame/tibble, a named list, or a named atomic vector. The column +#' names from the first successful element determine the final schema. +#' +#' Note: `ModelArray.wrap` never performs any p-value corrections or modifications. +#' If you need adjusted p-values (e.g., FDR), implement them inside your provided `FUN`. +#' +#' @param FUN A function that accepts at least `data` and returns a one-row data.frame/tibble, +#' a named list, or a named vector of results for that element. +#' @param data ModelArray class +#' @param phenotypes A data.frame with cohort covariates; must contain column `source_file`. +#' It must match `sources(data)[[scalar]]` order and contents (reordered if needed). +#' @param scalar Character, the scalar name to analyze +#' @param element.subset Optional integer vector of element indices (1-based); defaults to all +#' @param num.subj.lthr.abs Integer lower threshold for subjects with finite values (default 10) +#' @param num.subj.lthr.rel Relative lower threshold (0-1) (default 0.2) +#' @param verbose TRUE/FALSE to print messages +#' @param pbar TRUE/FALSE to show progress bar +#' @param n_cores Positive integer number of CPU cores +#' @param ... Additional arguments forwarded to `FUN` +#' @return Tibble/data.frame with one row per element and first column `element_id` +#' @importFrom dplyr %>% +#' @import doParallel +#' @import tibble +#' @importFrom glue glue +#' @export +ModelArray.wrap <- function(FUN, data, phenotypes, scalar, element.subset = NULL, + num.subj.lthr.abs = 10, num.subj.lthr.rel = 0.2, + verbose = TRUE, pbar = TRUE, n_cores = 1, + on_error = "stop", ...) { + # data type assertions + if (class(data) != "ModelArray") { + stop("data's class is not ModelArray!") + } + + ## element.subset default and checks + if (is.null(element.subset)) { + num.element.total <- numElementsTotal(modelarray = data, scalar_name = scalar) + element.subset <- 1:num.element.total + } + if (min(element.subset) < 1) stop("Minimal value in element.subset should >= 1") + if (max(element.subset) > nrow(scalars(data)[[scalar]])) { + stop(paste0( + "Maximal value in element.subset should <= number of elements = ", + as.character(nrow(scalars(data)[[scalar]])) + )) + } + if (class(element.subset) != "integer") stop("Please enter integers for element.subset!") + + ### Align phenotypes to sources + sources.modelarray <- sources(data)[[scalar]] + sources.phenotypes <- phenotypes[["source_file"]] + if (is.null(sources.phenotypes)) stop("Did not find column 'source_file' in argument 'phenotypes'. Please check!") + if (length(sources.modelarray) != length(sources.phenotypes)) { + stop(paste0( + "The length of source file list from phenotypes's column 'source_file' ", + "is not the same as that in ModelArray 'data'! Please check out! ", + "The latter one can be accessed by: sources(data)[[scalar]]" + )) + } + if (length(sources.modelarray) != length(unique(sources.modelarray))) { + stop( + paste0( + "The source files in ModelArray 'data' are not unique! Please check out! ", + "It can be accessed by: sources(data)[[scalar]]" + ) + ) + } + if (length(sources.phenotypes) != length(unique(sources.phenotypes))) { + stop( + paste0( + "The source files from phenotypes's column 'source_file' are not unique! ", + "Please check out and remove the duplicated one!" + ) + ) + } + if (!identical(sources.modelarray, sources.phenotypes)) { + if ((all(sources.modelarray %in% sources.phenotypes)) && (all(sources.phenotypes %in% sources.modelarray))) { + reorder_idx <- match(sources.modelarray, sources.phenotypes) + phenotypes <- phenotypes[reorder_idx, ] + row.names(phenotypes) <- NULL + if (!identical(phenotypes[["source_file"]], sources.modelarray)) { + stop("matching source file names were not successful...") + } + } else { + stop(paste0( + "phenotypes's column 'source_file' have different element(s) from source file list", + " in ModelArray 'data'! Please check out! ", + "The latter one can be accessed by: sources(data)[[scalar]]" + )) + } + } + ### thresholds + num.subj.total <- nrow(phenotypes) + num.subj.lthr <- max(num.subj.total * num.subj.lthr.rel, num.subj.lthr.abs) + + ### initiate to get column names + if (verbose) { + message(glue::glue("Running element-wise user function for {scalar}")) + message(glue::glue("initiating....")) + } + num.elements.total <- numElementsTotal(modelarray = data, scalar_name = scalar) + i_element_try <- floor(num.elements.total / 2) + outputs_initiator <- analyseOneElement.wrap( + i_element = i_element_try, + user_fun = FUN, modelarray = data, phenotypes = phenotypes, scalar = scalar, + num.subj.lthr = num.subj.lthr, num.stat.output = NULL, + flag_initiate = TRUE, on_error = on_error, + ... + ) + if (is.nan(outputs_initiator$column_names)[1]) { + message("No sufficient subjects at middle element; trying others for initiation....") + for (i_element_temp in (i_element_try + 1):num.elements.total) { + outputs_initiator <- analyseOneElement.wrap( + i_element = i_element_temp, + user_fun = FUN, modelarray = data, phenotypes = phenotypes, scalar = scalar, + num.subj.lthr = num.subj.lthr, num.stat.output = NULL, + flag_initiate = TRUE, on_error = on_error, + ... + ) + if (!(is.nan(outputs_initiator$column_names)[1])) break + } + if ((i_element_temp == num.elements.total) && (is.nan(outputs_initiator$column_names)[1])) { + message("Trying from the beginning for initiation....") + for (i_element_temp in 1:(i_element_try - 1)) { + outputs_initiator <- analyseOneElement.wrap( + i_element = i_element_temp, + user_fun = FUN, modelarray = data, phenotypes = phenotypes, scalar = scalar, + num.subj.lthr = num.subj.lthr, num.stat.output = NULL, + flag_initiate = TRUE, on_error = on_error, + ... + ) + if (!(is.nan(outputs_initiator$column_names)[1])) break + } + if ((i_element_temp == (i_element_try - 1)) && (is.nan(outputs_initiator$column_names)[1])) { + stop(paste0( + "Have tried all elements, but there is no element with sufficient subjects with valid, ", + "finite h5 scalar values (i.e. not NaN or NA, not infinite). ", + "Please check thresholds or masks!" + )) + } + } + } + + column_names <- outputs_initiator$column_names + num.stat.output <- length(column_names) + + if (verbose) message(glue::glue("looping across elements....")) + + if (n_cores > 1) { + if (pbar) { + fits <- pbmcapply::pbmclapply(element.subset, + analyseOneElement.wrap, + mc.cores = n_cores, + user_fun = FUN, modelarray = data, phenotypes = phenotypes, scalar = scalar, + num.subj.lthr = num.subj.lthr, num.stat.output = num.stat.output, + flag_initiate = FALSE, on_error = on_error, + ... + ) + } else { + fits <- parallel::mclapply(element.subset, + analyseOneElement.wrap, + mc.cores = n_cores, + user_fun = FUN, modelarray = data, phenotypes = phenotypes, scalar = scalar, + num.subj.lthr = num.subj.lthr, num.stat.output = num.stat.output, + flag_initiate = FALSE, on_error = on_error, + ... + ) + } + } else { + if (pbar) { + fits <- pbapply::pblapply(element.subset, + analyseOneElement.wrap, + user_fun = FUN, modelarray = data, phenotypes = phenotypes, scalar = scalar, + num.subj.lthr = num.subj.lthr, num.stat.output = num.stat.output, + flag_initiate = FALSE, on_error = on_error, + ... + ) + } else { + fits <- lapply(element.subset, + analyseOneElement.wrap, + user_fun = FUN, modelarray = data, phenotypes = phenotypes, scalar = scalar, + num.subj.lthr = num.subj.lthr, num.stat.output = num.stat.output, + flag_initiate = FALSE, on_error = on_error, + ... + ) + } + } + + df_out <- do.call(rbind, fits) + df_out <- as.data.frame(df_out) + colnames(df_out) <- column_names + + df_out +} diff --git a/tests/testthat/test-ModelArray_wrap.R b/tests/testthat/test-ModelArray_wrap.R new file mode 100644 index 0000000..d44d0eb --- /dev/null +++ b/tests/testthat/test-ModelArray_wrap.R @@ -0,0 +1,159 @@ +test_that("ModelArray.wrap reproduces ModelArray.lm with a custom FUN", { + h5_path <- system.file("extdata", "n50_fixels.h5", package = "ModelArray") + csv_path <- system.file("extdata", "n50_cohort.csv", package = "ModelArray") + + modelarray <- ModelArray(h5_path, scalar_types = c("FD"), analysis_names = c("my_analysis")) + phenotypes <- read.csv(csv_path) + scalar_name <- "FD" + element.subset <- 1:10 + + # Baseline using built-in lm wrapper + res_lm <- ModelArray.lm( + FD ~ age + sex, + data = modelarray, + phenotypes = phenotypes, + scalar = scalar_name, + element.subset = element.subset, + n_cores = 1, + pbar = FALSE + ) + + # User function that mimics ModelArray.lm defaults + my_lm_fun <- function(data, formula) { + fit <- stats::lm(formula = formula, data = data) + td <- broom::tidy(fit) + if (nrow(td) > 0) { + td$term[td$term == "(Intercept)"] <- "Intercept" + td <- td[, intersect(colnames(td), c("term", "estimate", "statistic", "p.value"))] + } + gl <- broom::glance(fit) + gl$term <- "model" + gl <- gl[, intersect(colnames(gl), c("term", "adj.r.squared", "p.value"))] + + if (nrow(td) > 0) { + td_w <- tidyr::pivot_wider( + td, + names_from = term, + values_from = c(estimate, statistic, p.value), + names_glue = "{term}.{.value}" + ) + } else { + td_w <- td + } + gl_w <- tidyr::pivot_wider( + gl, + names_from = term, + values_from = c(adj.r.squared, p.value), + names_glue = "{term}.{.value}" + ) + out <- dplyr::bind_cols(td_w, gl_w) + + # Add FDR p-value columns to match ModelArray.lm defaults + term_names <- gsub( + "\\.estimate$|\\.statistic$|\\.p.value$", + "", + grep("\\.estimate$|\\.statistic$|\\.p.value$", names(out), value = TRUE) + ) + term_names <- unique(term_names[term_names != "model"]) + for (nm in term_names) { + pcol <- paste0(nm, ".p.value") + if (pcol %in% names(out)) out[[paste0(pcol, ".fdr")]] <- stats::p.adjust(out[[pcol]], method = "fdr") + } + if ("model.p.value" %in% names(out)) { + out[["model.p.value.fdr"]] <- stats::p.adjust(out[["model.p.value"]], method = "fdr") + } + + tibble::as_tibble(out) + } + + res_wrap <- ModelArray.wrap( + FUN = my_lm_fun, + data = modelarray, + phenotypes = phenotypes, + scalar = scalar_name, + element.subset = element.subset, + n_cores = 1, + pbar = FALSE, + formula = FD ~ age + sex + ) + + # Same element_id and shape + expect_equal(res_wrap$element_id, res_lm$element_id) + expect_true(is.data.frame(res_wrap)) + + # Compare on the intersection of columns (names may differ in ordering), + # excluding FDR columns (which are applied across all elements in ModelArray.lm) + common_cols <- intersect(colnames(res_lm), colnames(res_wrap)) + common_cols <- setdiff(common_cols, grep("\\.p.value.fdr$", common_cols, value = TRUE)) + # Ensure we have a reasonable overlap (at least the default stats) + required_patterns <- c( + "^Intercept\\.estimate$", "^age\\.estimate$", "^sex.*\\.estimate$", + "^Intercept\\.statistic$", "^age\\.statistic$", "^sex.*\\.statistic$", + "^Intercept\\.p\\.value$", "^age\\.p\\.value$", "^sex.*\\.p\\.value$", + "^model\\.adj\\.r\\.squared$", "^model\\.p\\.value$" + ) + expect_true(all(sapply(required_patterns, function(p) any(grepl(p, common_cols))))) + + expect_equal(res_wrap[, common_cols], res_lm[, common_cols], tolerance = 1e-8) + + rhdf5::h5closeAll() +}) +test_that("ModelArray.wrap runs simple group means function and writes outputs", { + h5_path <- system.file("extdata", "n50_fixels.h5", package = "ModelArray") + csv_path <- system.file("extdata", "n50_cohort.csv", package = "ModelArray") + + modelarray <- ModelArray(h5_path, scalar_types = c("FD"), analysis_names = c("my_analysis")) + phenotypes <- read.csv(csv_path) + scalar_name <- "FD" + element.subset <- 1:10 + + my_group_means_fun <- function(data, group_var, response_name) { + if (!group_var %in% names(data)) stop("group_var not found in data") + if (!response_name %in% names(data)) stop("response_name not found in data") + df <- data + vals <- df[[response_name]] + keep <- is.finite(vals) + df <- df[keep, , drop = FALSE] + vals <- df[[response_name]] + grp <- df[[group_var]] + levs <- sort(unique(grp)) + out <- list() + for (lv in levs) { + out[[paste0("mean_", response_name, "_", group_var, "_", lv)]] <- mean(vals[grp == lv], na.rm = TRUE) + } + if (length(levs) >= 2) { + out[[paste0("diff_mean_", response_name, "_", group_var, "_", levs[2], "_minus_", levs[1])]] <- + out[[paste0("mean_", response_name, "_", group_var, "_", levs[2])]] - + out[[paste0("mean_", response_name, "_", group_var, "_", levs[1])]] + } + tibble::as_tibble(out) + } + + res_group <- ModelArray.wrap( + FUN = my_group_means_fun, + data = modelarray, + phenotypes = phenotypes, + scalar = scalar_name, + element.subset = element.subset, + n_cores = 1, + pbar = FALSE, + group_var = "sex", + response_name = "FD" + ) + + expect_true(is.data.frame(res_group)) + expect_equal(res_group$element_id, 0:(length(element.subset) - 1)) + + # Expected columns exist (based on available levels in phenotypes$sex) + levs <- sort(unique(phenotypes$sex)) + expected_cols <- paste0("mean_", scalar_name, "_sex_", levs) + expect_true(all(expected_cols %in% colnames(res_group))) + if (length(levs) >= 2) { + expect_true(paste0("diff_mean_", scalar_name, "_sex_", levs[2], "_minus_", levs[1]) %in% colnames(res_group)) + } + + # Values should be finite (no NAs introduced by wrapper mechanics) + expect_true(all(is.finite(as.matrix(res_group[, setdiff(colnames(res_group), "element_id")])))) + + rhdf5::h5closeAll() +}) diff --git a/vignettes/wrap.Rmd b/vignettes/wrap.Rmd new file mode 100644 index 0000000..6400e2f --- /dev/null +++ b/vignettes/wrap.Rmd @@ -0,0 +1,182 @@ +--- +title: "Writing custom element-wise analyses with ModelArray.wrap" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Writing custom element-wise analyses with ModelArray.wrap} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +## Overview + +`ModelArray.wrap` lets you run your own per-element analysis function over all elements in a `ModelArray`, reusing the same alignment, subject-threshold checks, and parallel looping machinery as `ModelArray.lm` and `ModelArray.gam`. + +- You provide a function `FUN` that operates on a single element by receiving a `data` frame (the cohort `phenotypes` with an added response column named by `scalar`). +- `FUN` should return a one-row data.frame/tibble, a named list, or a named vector. The column names from the first successful element define the output schema. +- The result is a combined data.frame where the first column is `element_id` (zero-based). + +This vignette shows how to write a `FUN` that produces outputs equivalent to the defaults of `ModelArray.lm`. + +Important: `ModelArray.wrap` does not automatically adjust or otherwise modify p-values. If you want adjusted p-values (e.g., FDR), compute them inside your custom function `FUN` (as shown below for FDR). + +## Example: Using ModelArray.wrap to reproduce ModelArray.lm + +Below, we define `my_lm_fun()` that: + +- fits `stats::lm(formula, data=data)`; +- extracts term-level stats via `broom::tidy()` and selects the default fields used by `ModelArray.lm` (`estimate`, `statistic`, `p.value`); +- extracts model-level stats via `broom::glance()` and selects defaults (`adj.r.squared`, `p.value`); +- pivots term and model summaries into a single one-row tibble with column names like `Age.estimate`, `Age.statistic`, `Age.p.value`, `model.adj.r.squared`, `model.p.value`; +- adds FDR-corrected p-values for terms and the model to match `ModelArray.lm` defaults. + +```r +#' One-element LM equivalent used by ModelArray.wrap +my_lm_fun <- function(data, formula) { + # Fit lm + fit <- stats::lm(formula = formula, data = data) + + # Term-level stats + td <- broom::tidy(fit) + if (nrow(td) > 0) { + td$term[td$term == "(Intercept)"] <- "Intercept" + td <- td[, intersect(colnames(td), c("term", "estimate", "statistic", "p.value"))] + } + + # Model-level stats + gl <- broom::glance(fit) + gl$term <- "model" + gl <- gl[, intersect(colnames(gl), c("term", "adj.r.squared", "p.value"))] + + # Pivot to one row per element + library(tidyr) + library(dplyr) + + td_w <- if (nrow(td) > 0) { + pivot_wider(td, names_from = term, values_from = c(estimate, statistic, p.value), + names_glue = "{term}.{.value}") + } else td + + gl_w <- pivot_wider(gl, names_from = term, values_from = c(adj.r.squared, p.value), + names_glue = "{term}.{.value}") + + out <- bind_cols(td_w, gl_w) + + # Add FDR-corrected p-values to mirror ModelArray.lm defaults + term_names <- gsub("\\.estimate$|\\.statistic$|\\.p.value$", "", grep("\\.estimate$|\\.statistic$|\\.p.value$", names(out), value = TRUE)) + term_names <- unique(term_names[term_names != "model"]) # exclude model + + for (nm in term_names) { + pcol <- paste0(nm, ".p.value") + if (pcol %in% names(out)) { + out[[paste0(pcol, ".fdr")]] <- stats::p.adjust(out[[pcol]], method = "fdr") + } + } + if ("model.p.value" %in% names(out)) { + out[["model.p.value.fdr"]] <- stats::p.adjust(out[["model.p.value"]], method = "fdr") + } + + # Ensure it's a one-row tibble + tibble::as_tibble(out) +} +``` + +With the function defined, we can run it across elements with `ModelArray.wrap`. The example mirrors the linear model in the walkthrough vignette and runs on the first 100 elements for speed. + +```r +library(ModelArray) + +# Paths and data (adapt these to your setup) +h5_path <- "~/Desktop/myProject/demo_FDC_n100.h5" +csv_path <- "~/Desktop/myProject/cohort_FDC_n100.csv" +modelarray <- ModelArray(h5_path, scalar_types = c("FDC")) +phenotypes <- read.csv(csv_path) + +# Same formula used by ModelArray.lm +formula_lm <- FDC ~ Age + sex + dti64MeanRelRMS + +# Run user-wrapped LM on the first 100 elements +res_wrap_lm <- ModelArray.wrap( + FUN = my_lm_fun, + data = modelarray, + phenotypes = phenotypes, + scalar = "FDC", + element.subset = 1:100, + formula = formula_lm +) + +head(res_wrap_lm) +``` + +Optionally, save the results back into the HDF5 file: + +```r +writeResults(h5_path, df.output = res_wrap_lm, analysis_name = "results_wrap_lm") +``` + +## Notes and tips + +- The schema (column names) is inferred from the first element that has sufficient valid subjects. Ensure your `FUN` returns the same shape and names for all elements. +- `ModelArray.wrap` forwards `...` to your `FUN`. Use this to pass parameters like `formula` or other options. +- Subject thresholds: elements with insufficient finite values are automatically filled with `NaN` outputs, keeping `element_id` for alignment with other results. +- Parallelization: set `n_cores > 1` to enable multi-core execution; set `pbar = TRUE` to show a progress bar. + +## Example: Computing simple group means + +You can also use `ModelArray.wrap` to compute very simple statistics, such as group means of the response for each element. The example below computes the mean value of the response (the `scalar` column) within each level of a grouping variable (for example, `sex`), and also reports the difference between the first two levels if present. + +```r +#' One-element group means across levels of a grouping variable +my_group_means_fun <- function(data, group_var, response_name) { + if (!group_var %in% names(data)) stop("group_var not found in data") + if (!response_name %in% names(data)) stop("response_name not found in data") + + df <- data + vals <- df[[response_name]] + # keep finite values only + keep <- is.finite(vals) + df <- df[keep, , drop = FALSE] + vals <- df[[response_name]] + grp <- df[[group_var]] + + levs <- sort(unique(grp)) + out <- list() + for (lv in levs) { + out[[paste0("mean_", response_name, "_", group_var, "_", lv)]] <- mean(vals[grp == lv], na.rm = TRUE) + } + if (length(levs) >= 2) { + out[[paste0("diff_mean_", response_name, "_", group_var, "_", levs[2], "_minus_", levs[1])]] <- + out[[paste0("mean_", response_name, "_", group_var, "_", levs[2])]] - + out[[paste0("mean_", response_name, "_", group_var, "_", levs[1])]] + } + + tibble::as_tibble(out) +} +``` + +Run across elements (here on the first 100 for speed), and save results: + +```r +library(ModelArray) + +# Paths and data (adapt these to your setup) +h5_path <- "~/Desktop/myProject/demo_FDC_n100.h5" +csv_path <- "~/Desktop/myProject/cohort_FDC_n100.csv" +modelarray <- ModelArray(h5_path, scalar_types = c("FDC")) +phenotypes <- read.csv(csv_path) + +res_group_means <- ModelArray.wrap( + FUN = my_group_means_fun, + data = modelarray, + phenotypes = phenotypes, + scalar = "FDC", + element.subset = 1:100, + group_var = "sex", + response_name = "FDC" +) + +head(res_group_means) + +writeResults(h5_path, df.output = res_group_means, analysis_name = "results_group_means") +``` + + From e8b4c58e66351b004b0f1bc7c7d758692a4d6804 Mon Sep 17 00:00:00 2001 From: mattcieslak Date: Mon, 15 Sep 2025 16:35:51 -0400 Subject: [PATCH 2/9] styler --- R/ModelArray_Constructor.R | 296 +++++++++++++++++++++---------------- 1 file changed, 166 insertions(+), 130 deletions(-) diff --git a/R/ModelArray_Constructor.R b/R/ModelArray_Constructor.R index c2649a6..d4aa652 100644 --- a/R/ModelArray_Constructor.R +++ b/R/ModelArray_Constructor.R @@ -32,7 +32,8 @@ ModelArray <- setClass( #' @return The results slot of the object #' @aliases results #' @keywords internal -setGeneric("results", function(x, ...) standardGeneric("results")) +setGeneric("results", function(x, ...) + standardGeneric("results")) #' Access the scalars slot of an object #' @@ -47,7 +48,8 @@ setGeneric("results", function(x, ...) standardGeneric("results")) #' @return The scalars slot of the object #' @aliases scalars #' @keywords internal -setGeneric("scalars", function(x, ...) standardGeneric("scalars")) +setGeneric("scalars", function(x, ...) + standardGeneric("scalars")) #' Access the sources slot of an object #' @@ -61,7 +63,8 @@ setGeneric("scalars", function(x, ...) standardGeneric("scalars")) #' @return The sources slot of the object #' @aliases sources #' @keywords internal -setGeneric("sources", function(x) standardGeneric("sources")) +setGeneric("sources", function(x) + standardGeneric("sources")) #' Access the results slot of a ModelArray object @@ -77,7 +80,8 @@ setGeneric("sources", function(x) standardGeneric("sources")) #' @return The results slot of the ModelArray object #' @keywords internal #' @export -setMethod("results", "ModelArray", function(x, ...) x@results) +setMethod("results", "ModelArray", function(x, ...) + x@results) #' Access the scalars slot of a ModelArray object #' @@ -92,7 +96,8 @@ setMethod("results", "ModelArray", function(x, ...) x@results) #' @return The scalars slot of the ModelArray object #' @keywords internal #' @export -setMethod("scalars", "ModelArray", function(x, ...) x@scalars) +setMethod("scalars", "ModelArray", function(x, ...) + x@scalars) #' Access the sources slot of a ModelArray object #' @@ -106,7 +111,8 @@ setMethod("scalars", "ModelArray", function(x, ...) x@scalars) #' @return The sources slot of the ModelArray object #' @keywords internal #' @export -setMethod("sources", "ModelArray", function(x) x@sources) +setMethod("sources", "ModelArray", function(x) + x@sources) @@ -125,10 +131,7 @@ ModelArraySeed <- function(filepath, name, type = NA) { # NOTE: the checker for if h5 groups fixels/voxels/scalars exist # (a.k.a valid fixel-wise data) is deleted, as ModelArray is generalized to any modality. - seed <- HDF5Array::HDF5ArraySeed( - filepath, - name = name, type = type - ) # HDF5Array is also from BioConductor... + seed <- HDF5Array::HDF5ArraySeed(filepath, name = name, type = type) # HDF5Array is also from BioConductor... seed } @@ -153,7 +156,9 @@ ModelArraySeed <- function(filepath, name, type = NA) { #' @importFrom dplyr %>% #' @importFrom DelayedArray DelayedArray realize #' @importFrom rhdf5 h5readAttributes -ModelArray <- function(filepath, scalar_types = c("FD"), analysis_names = c("myAnalysis")) { +ModelArray <- function(filepath, + scalar_types = c("FD"), + analysis_names = c("myAnalysis")) { # TODO: try and use hdf5r instead of rhdf5 and delayedarray here # fn.h5 <- H5File$new(filepath, mode="a") # open; "a": creates a new file or opens an existing one for read/write @@ -180,10 +185,7 @@ ModelArray <- function(filepath, scalar_types = c("FD"), analysis_names = c("myA ) %>% DelayedArray::DelayedArray() # load attribute "column_names", i.e. source filenames: - sources[[x]] <- rhdf5::h5readAttributes( - filepath, - name = sprintf("scalars/%s/values", scalar_types[x]) - )$column_names %>% as.character() + sources[[x]] <- rhdf5::h5readAttributes(filepath, name = sprintf("scalars/%s/values", scalar_types[x]))$column_names %>% as.character() # transpose scalar_data[[x]] if needed: if (dim(scalar_data[[x]])[2] == length(sources[[x]])) { @@ -216,7 +218,8 @@ ModelArray <- function(filepath, scalar_types = c("FD"), analysis_names = c("myA # message(flag_results_exist) if (flag_results_exist == FALSE) { results_data <- list() - } else { # results group exist --> to load subfolders + } else { + # results group exist --> to load subfolders results_data <- vector("list", length(analysis_names)) for (x in seq_along(analysis_names)) { @@ -226,12 +229,11 @@ ModelArray <- function(filepath, scalar_types = c("FD"), analysis_names = c("myA flag_analysis_exist <- flagAnalysisExistInh5(filepath, analysis_name = analysis_name) if (flag_analysis_exist == FALSE) { stop(paste0("This analysis: ", analysis_name, " does not exist...")) - } else { # exists + } else { + # exists # /results//has_names: - names_results_matrix <- rhdf5::h5readAttributes( - filepath, - name = sprintf("results/%s/results_matrix", analysis_name) - )$colnames # after updating writeResults() + names_results_matrix <- rhdf5::h5readAttributes(filepath, + name = sprintf("results/%s/results_matrix", analysis_name))$colnames # after updating writeResults() # names_results_matrix <- ModelArraySeed(filepath, name = sprintf( # "results/%s/has_names", analysis_name), type = NA) %>% @@ -252,9 +254,7 @@ ModelArray <- function(filepath, scalar_types = c("FD"), analysis_names = c("myA results_data[[x]]$results_matrix <- t(results_data[[x]]$results_matrix) } - colnames(results_data[[x]]$results_matrix) <- as.character( - DelayedArray::realize(names_results_matrix) - ) # designate the column names + colnames(results_data[[x]]$results_matrix) <- as.character(DelayedArray::realize(names_results_matrix)) # designate the column names # /results//lut_col?: # LOOP OVER # OF COL OF $RESULTS_MATRIX, AND SEE IF THERE IS LUT_COL @@ -321,7 +321,8 @@ ModelArray <- function(filepath, scalar_types = c("FD"), analysis_names = c("myA #' @return number of elements in ModelArray, for this specific scalar #' @export numElementsTotal <- function(modelarray, scalar_name = "FD") { - if (!(scalar_name %in% names(scalars(modelarray)))) { # not an existing scalar + if (!(scalar_name %in% names(scalars(modelarray)))) { + # not an existing scalar stop("scalar_name requested in not in modelarray! Please check out: scalars(modelarray)") } @@ -370,9 +371,14 @@ numElementsTotal <- function(modelarray, scalar_name = "FD") { #' @import tibble analyseOneElement.lm <- function(i_element, - formula, modelarray, phenotypes, scalar, - var.terms, var.model, - num.subj.lthr, num.stat.output = NULL, + formula, + modelarray, + phenotypes, + scalar, + var.terms, + var.model, + num.subj.lthr, + num.stat.output = NULL, flag_initiate = FALSE, on_error = "stop", ...) { @@ -410,7 +416,10 @@ analyseOneElement.lm <- function(i_element, onemodel <- tryCatch({ do.call(stats::lm, arguments_lm) }, error = function(e) { - msg <- paste0("analyseOneElement.lm error at element ", i_element, ": ", conditionMessage(e)) + msg <- paste0("analyseOneElement.lm error at element ", + i_element, + ": ", + conditionMessage(e)) if (on_error == "debug" && interactive()) { message(msg) browser() @@ -428,16 +437,10 @@ analyseOneElement.lm <- function(i_element, if (inherits(onemodel, "lm_error")) { if (flag_initiate == TRUE) { - toreturn <- list( - column_names = NaN, - list.terms = NaN - ) + toreturn <- list(column_names = NaN, list.terms = NaN) return(toreturn) } else { - onerow <- c( - i_element - 1, - rep(NaN, (num.stat.output - 1)) - ) + onerow <- c(i_element - 1, rep(NaN, (num.stat.output - 1))) return(onerow) } } @@ -473,7 +476,8 @@ analyseOneElement.lm <- function(i_element, } # remove those columns: - if (length(var.terms.remove) != 0) { # if length=0, it's list(), nothing to remove + if (length(var.terms.remove) != 0) { + # if length=0, it's list(), nothing to remove onemodel.tidy <- dplyr::select(onemodel.tidy, -all_of(var.terms.remove)) } if (length(var.model.remove) != 0) { @@ -495,16 +499,19 @@ analyseOneElement.lm <- function(i_element, # all(union) = c(TRUE, FALSE, ...) temp <- union(temp_colnames, "term") # just an empty tibble (so below, all(dim(onemodel.tidy)) = FALSE) - if (all(temp == "term")) onemodel.tidy <- tibble() + if (all(temp == "term")) + onemodel.tidy <- tibble() temp_colnames <- onemodel.glance %>% colnames() temp <- union(temp_colnames, "term") # union of colnames and "term"; - if (all(temp == "term")) onemodel.glance <- tibble() + if (all(temp == "term")) + onemodel.glance <- tibble() # flatten .tidy results into one row: - if (all(dim(onemodel.tidy))) { # not empty | if any dim is 0, all=FALSE + if (all(dim(onemodel.tidy))) { + # not empty | if any dim is 0, all=FALSE onemodel.tidy.onerow <- onemodel.tidy %>% tidyr::pivot_wider( names_from = term, values_from = all_of(var.terms.orig), @@ -514,7 +521,8 @@ analyseOneElement.lm <- function(i_element, onemodel.tidy.onerow <- onemodel.tidy } - if (all(dim(onemodel.glance))) { # not empty + if (all(dim(onemodel.glance))) { + # not empty onemodel.glance.onerow <- onemodel.glance %>% tidyr::pivot_wider( names_from = term, values_from = all_of(var.model), @@ -530,38 +538,34 @@ analyseOneElement.lm <- function(i_element, # add a column of element ids: colnames.temp <- colnames(onemodel.onerow) - onemodel.onerow <- onemodel.onerow %>% tibble::add_column( - element_id = i_element - 1, .before = colnames.temp[1] - ) # add as the first column + onemodel.onerow <- onemodel.onerow %>% tibble::add_column(element_id = i_element - 1, .before = colnames.temp[1]) # add as the first column # now you can get the headers, # of columns, etc of the output results - if (flag_initiate == TRUE) { # return the column names: + if (flag_initiate == TRUE) { + # return the column names: # return: column_names <- colnames(onemodel.onerow) - toreturn <- list( - column_names = column_names, - list.terms = list.terms - ) + toreturn <- list(column_names = column_names, list.terms = list.terms) toreturn - } else if (flag_initiate == FALSE) { # return the one row results: + } else if (flag_initiate == FALSE) { + # return the one row results: # return: onerow <- as.numeric(onemodel.onerow) # change from tibble to numeric to save some space onerow } - } else { # if flag_sufficient==FALSE + } else { + # if flag_sufficient==FALSE if (flag_initiate == TRUE) { - toreturn <- list( - column_names = NaN, - list.terms = NaN - ) + toreturn <- list(column_names = NaN, list.terms = NaN) toreturn } else if (flag_initiate == FALSE) { onerow <- c( - i_element - 1, # first is element_id, which will still be a finite number + i_element - 1, + # first is element_id, which will still be a finite number rep(NaN, (num.stat.output - 1)) ) # other columns will be NaN @@ -613,10 +617,18 @@ analyseOneElement.lm <- function(i_element, #' @importFrom dplyr select %>% bind_cols #' @import tibble -analyseOneElement.gam <- function(i_element, formula, modelarray, phenotypes, scalar, - var.smoothTerms, var.parametricTerms, var.model, - num.subj.lthr, num.stat.output = NULL, - flag_initiate = FALSE, flag_sse = FALSE, +analyseOneElement.gam <- function(i_element, + formula, + modelarray, + phenotypes, + scalar, + var.smoothTerms, + var.parametricTerms, + var.model, + num.subj.lthr, + num.stat.output = NULL, + flag_initiate = FALSE, + flag_sse = FALSE, on_error = "stop", ...) { values <- scalars(modelarray)[[scalar]][i_element, ] @@ -642,7 +654,10 @@ analyseOneElement.gam <- function(i_element, formula, modelarray, phenotypes, sc onemodel <- tryCatch({ do.call(mgcv::gam, arguments) }, error = function(e) { - msg <- paste0("analyseOneElement.gam error at element ", i_element, ": ", conditionMessage(e)) + msg <- paste0("analyseOneElement.gam error at element ", + i_element, + ": ", + conditionMessage(e)) if (on_error == "debug" && interactive()) { message(msg) browser() @@ -668,10 +683,7 @@ analyseOneElement.gam <- function(i_element, formula, modelarray, phenotypes, sc ) return(toreturn) } else { - onerow <- c( - i_element - 1, - rep(NaN, (num.stat.output - 1)) - ) + onerow <- c(i_element - 1, rep(NaN, (num.stat.output - 1))) return(onerow) } } @@ -730,28 +742,35 @@ analyseOneElement.gam <- function(i_element, formula, modelarray, phenotypes, sc } # remove those columns: - if (length(var.smoothTerms.remove) != 0) { # if length=0, it's list(), nothing to remove - onemodel.tidy.smoothTerms <- dplyr::select(onemodel.tidy.smoothTerms, -all_of(var.smoothTerms.remove)) + if (length(var.smoothTerms.remove) != 0) { + # if length=0, it's list(), nothing to remove + onemodel.tidy.smoothTerms <- dplyr::select(onemodel.tidy.smoothTerms, + -all_of(var.smoothTerms.remove)) } - if (length(var.parametricTerms.remove) != 0) { # if length=0, it's list(), nothing to remove - onemodel.tidy.parametricTerms <- dplyr::select(onemodel.tidy.parametricTerms, -all_of(var.parametricTerms.remove)) + if (length(var.parametricTerms.remove) != 0) { + # if length=0, it's list(), nothing to remove + onemodel.tidy.parametricTerms <- dplyr::select(onemodel.tidy.parametricTerms, + -all_of(var.parametricTerms.remove)) } if (length(var.model.remove) != 0) { onemodel.glance <- dplyr::select(onemodel.glance, -all_of(var.model.remove)) } # adjust: - if (num.smoothTerms > 0) { # if there is any smooth term + if (num.smoothTerms > 0) { + # if there is any smooth term onemodel.tidy.smoothTerms$term[onemodel.tidy.smoothTerms$term == "(Intercept)"] <- "Intercept" # change the term name from "(Intercept)" to "Intercept" } - if (nrow(onemodel.tidy.parametricTerms) > 0) { # if there is any parametric term + if (nrow(onemodel.tidy.parametricTerms) > 0) { + # if there is any parametric term onemodel.tidy.parametricTerms$term[onemodel.tidy.parametricTerms$term == "(Intercept)"] <- "Intercept" # change the term name from "(Intercept)" to "Intercept" } # change from s(x) to s_x: (could be s, te, etc); from s(x):oFactor to s_x_BYoFactor; from ti(x,z) to ti_x_z - if (num.smoothTerms > 0) { # if there is any smooth term + if (num.smoothTerms > 0) { + # if there is any smooth term for (i_row in seq_len(nrow(onemodel.tidy.smoothTerms))) { # step 1: change from s(x) to s_x term_name <- onemodel.tidy.smoothTerms$term[i_row] @@ -761,11 +780,9 @@ analyseOneElement.gam <- function(i_element, formula, modelarray, phenotypes, sc smooth_name <- str_list[1] # "s" or some other smooth method type such as "te" str_valid <- paste0(smooth_name, "_", str) - if (length(str_list) > 2) { # there is string after variable name - str_valid <- paste0( - str_valid, "_", - paste(str_list[3:length(str_list)], collapse = "") - ) # combine rest of strings + if (length(str_list) > 2) { + # there is string after variable name + str_valid <- paste0(str_valid, "_", paste(str_list[3:length(str_list)], collapse = "")) # combine rest of strings } # detect ":", and change to "BY" # there is "_" replacing for ")" in "s()" already @@ -801,20 +818,24 @@ analyseOneElement.gam <- function(i_element, formula, modelarray, phenotypes, sc # union of colnames and "term"; if colnames only has "term" or lengt of 0 (tibble()), # union = "term", all(union)=TRUE; otherwise, if there is colnames other than "term", # all(union) = c(TRUE, FALSE, ...) - if (all(temp == "term")) onemodel.tidy.smoothTerms <- tibble() + if (all(temp == "term")) + onemodel.tidy.smoothTerms <- tibble() # just an empty tibble (so below, all(dim(onemodel.tidy.smoothTerms)) = FALSE) temp_colnames <- onemodel.tidy.parametricTerms %>% colnames() temp <- union(temp_colnames, "term") - if (all(temp == "term")) onemodel.tidy.parametricTerms <- tibble() # just an empty tibble + if (all(temp == "term")) + onemodel.tidy.parametricTerms <- tibble() # just an empty tibble temp_colnames <- onemodel.glance %>% colnames() temp <- union(temp_colnames, "term") - if (all(temp == "term")) onemodel.glance <- tibble() # just an empty tibble + if (all(temp == "term")) + onemodel.glance <- tibble() # just an empty tibble # flatten .tidy results into one row: - if (all(dim(onemodel.tidy.smoothTerms))) { # not empty | if any dim is 0, all=FALSE + if (all(dim(onemodel.tidy.smoothTerms))) { + # not empty | if any dim is 0, all=FALSE onemodel.tidy.smoothTerms.onerow <- onemodel.tidy.smoothTerms %>% tidyr::pivot_wider( names_from = term, values_from = all_of(var.smoothTerms.orig), @@ -824,7 +845,8 @@ analyseOneElement.gam <- function(i_element, formula, modelarray, phenotypes, sc onemodel.tidy.smoothTerms.onerow <- onemodel.tidy.smoothTerms } - if (all(dim(onemodel.tidy.parametricTerms))) { # not empty + if (all(dim(onemodel.tidy.parametricTerms))) { + # not empty onemodel.tidy.parametricTerms.onerow <- onemodel.tidy.parametricTerms %>% tidyr::pivot_wider( names_from = term, values_from = all_of(var.parametricTerms.orig), @@ -834,7 +856,8 @@ analyseOneElement.gam <- function(i_element, formula, modelarray, phenotypes, sc onemodel.tidy.parametricTerms.onerow <- onemodel.tidy.parametricTerms } - if (all(dim(onemodel.glance))) { # not empty + if (all(dim(onemodel.glance))) { + # not empty onemodel.glance.onerow <- onemodel.glance %>% tidyr::pivot_wider( names_from = term, values_from = all_of(var.model), @@ -846,14 +869,9 @@ analyseOneElement.gam <- function(i_element, formula, modelarray, phenotypes, sc # combine the tables: check if any of them is empty (tibble()) - onemodel.onerow <- bind_cols_check_emptyTibble( - onemodel.tidy.smoothTerms.onerow, - onemodel.tidy.parametricTerms.onerow - ) - onemodel.onerow <- bind_cols_check_emptyTibble( - onemodel.onerow, - onemodel.glance.onerow - ) + onemodel.onerow <- bind_cols_check_emptyTibble(onemodel.tidy.smoothTerms.onerow, + onemodel.tidy.parametricTerms.onerow) + onemodel.onerow <- bind_cols_check_emptyTibble(onemodel.onerow, onemodel.glance.onerow) # add a column of element ids: @@ -872,7 +890,8 @@ analyseOneElement.gam <- function(i_element, formula, modelarray, phenotypes, sc # now you can get the headers, # of columns, etc of the output results - if (flag_initiate == TRUE) { # return the column names: + if (flag_initiate == TRUE) { + # return the column names: # return: column_names <- colnames(onemodel.onerow) @@ -883,13 +902,15 @@ analyseOneElement.gam <- function(i_element, formula, modelarray, phenotypes, sc sp.criterion.attr.name = sp.criterion.attr.name ) toreturn - } else if (flag_initiate == FALSE) { # return the one row results: + } else if (flag_initiate == FALSE) { + # return the one row results: # return: onerow <- as.numeric(onemodel.onerow) # change from tibble to numeric to save some space onerow } - } else { # if flag_sufficient==FALSE + } else { + # if flag_sufficient==FALSE if (flag_initiate == TRUE) { toreturn <- list( column_names = NaN, @@ -900,7 +921,8 @@ analyseOneElement.gam <- function(i_element, formula, modelarray, phenotypes, sc toreturn } else if (flag_initiate == FALSE) { onerow <- c( - i_element - 1, # first is element_id, which will still be a finite number + i_element - 1, + # first is element_id, which will still be a finite number rep(NaN, (num.stat.output - 1)) ) # other columns will be NaN @@ -945,8 +967,13 @@ analyseOneElement.gam <- function(i_element, formula, modelarray, phenotypes, sc #' @export #' @importFrom dplyr %>% #' @import tibble -analyseOneElement.wrap <- function(i_element, user_fun, modelarray, phenotypes, scalar, - num.subj.lthr, num.stat.output = NULL, +analyseOneElement.wrap <- function(i_element, + user_fun, + modelarray, + phenotypes, + scalar, + num.subj.lthr, + num.stat.output = NULL, flag_initiate = FALSE, on_error = "stop", ...) { @@ -972,7 +999,10 @@ analyseOneElement.wrap <- function(i_element, user_fun, modelarray, phenotypes, result <- tryCatch({ do.call(user_fun, arguments) }, error = function(e) { - msg <- paste0("analyseOneElement.wrap error at element ", i_element, ": ", conditionMessage(e)) + msg <- paste0("analyseOneElement.wrap error at element ", + i_element, + ": ", + conditionMessage(e)) if (on_error == "debug" && interactive()) { message(msg) browser() @@ -991,15 +1021,10 @@ analyseOneElement.wrap <- function(i_element, user_fun, modelarray, phenotypes, # On error in user function, return NaNs according to flag if (inherits(result, "wrap_error")) { if (flag_initiate == TRUE) { - toreturn <- list( - column_names = NaN - ) + toreturn <- list(column_names = NaN) return(toreturn) } else { - onerow <- c( - i_element - 1, - rep(NaN, (num.stat.output - 1)) - ) + onerow <- c(i_element - 1, rep(NaN, (num.stat.output - 1))) return(onerow) } } @@ -1008,7 +1033,9 @@ analyseOneElement.wrap <- function(i_element, user_fun, modelarray, phenotypes, if ("data.frame" %in% class(result)) { onerow_tbl <- tibble::as_tibble(result) if (nrow(onerow_tbl) != 1) { - stop("The user function must return a one-row data.frame/tibble, a named list, or a named vector.") + stop( + "The user function must return a one-row data.frame/tibble, a named list, or a named vector." + ) } } else if (is.list(result)) { onerow_tbl <- tibble::as_tibble_row(result) @@ -1018,10 +1045,12 @@ analyseOneElement.wrap <- function(i_element, user_fun, modelarray, phenotypes, } onerow_tbl <- tibble::as_tibble_row(as.list(result)) } else { - stop(paste0( - "Unsupported return type from user function. ", - "Return a one-row data.frame/tibble, named list, or named vector." - )) + stop( + paste0( + "Unsupported return type from user function. ", + "Return a one-row data.frame/tibble, named list, or named vector." + ) + ) } # add element_id as first column @@ -1029,25 +1058,19 @@ analyseOneElement.wrap <- function(i_element, user_fun, modelarray, phenotypes, onerow_tbl <- onerow_tbl %>% tibble::add_column(element_id = i_element - 1, .before = colnames.temp[1]) if (flag_initiate == TRUE) { - toreturn <- list( - column_names = colnames(onerow_tbl) - ) + toreturn <- list(column_names = colnames(onerow_tbl)) toreturn } else { # return numeric vector to be consistent with other analysers as.numeric(onerow_tbl) } - } else { # insufficient subjects + } else { + # insufficient subjects if (flag_initiate == TRUE) { - toreturn <- list( - column_names = NaN - ) + toreturn <- list(column_names = NaN) toreturn } else { - onerow <- c( - i_element - 1, - rep(NaN, (num.stat.output - 1)) - ) + onerow <- c(i_element - 1, rep(NaN, (num.stat.output - 1))) onerow } } @@ -1070,7 +1093,10 @@ analyseOneElement.wrap <- function(i_element, user_fun, modelarray, phenotypes, #' whether overwrite it (TRUE) or not (FALSE) #' @import hdf5r #' @export -writeResults <- function(fn.output, df.output, analysis_name = "myAnalysis", overwrite = TRUE) { +writeResults <- function(fn.output, + df.output, + analysis_name = "myAnalysis", + overwrite = TRUE) { # This is enhanced version with: 1) change to hdf5r; 2) write results with only one row for one element # check "df.output" @@ -1082,20 +1108,25 @@ writeResults <- function(fn.output, df.output, analysis_name = "myAnalysis", ove # open; "a": creates a new file or opens an existing one for read/write # check if group "results" already exists! - if (fn.output.h5$exists("results") == TRUE) { # group "results" exist + if (fn.output.h5$exists("results") == TRUE) { + # group "results" exist results.grp <- fn.output.h5$open("results") } else { results.grp <- fn.output.h5$create_group("results") } # check if group "results\" exists: - if (results.grp$exists(analysis_name) == TRUE && overwrite == FALSE) { + if (results.grp$exists(analysis_name) == TRUE && + overwrite == FALSE) { warning(paste0(analysis_name, " exists but not to overwrite!")) # TODO: add checker for exisiting analysis_name, esp the matrix size results.analysis.grp <- results.grp$open(analysis_name) results_matrix_ds <- results.analysis.grp[["results_matrix"]] - } else { # not exist; or exist && overwrite: to create - if (results.grp$exists(analysis_name) == TRUE && overwrite == TRUE) { # delete existing one first + } else { + # not exist; or exist && overwrite: to create + if (results.grp$exists(analysis_name) == TRUE && + overwrite == TRUE) { + # delete existing one first results.grp$link_delete(analysis_name) # NOTE: the file size will not shrink after your deletion.. # this is because of HDF5, regardless of package of hdf5r or rhdf5 @@ -1107,13 +1138,18 @@ writeResults <- function(fn.output, df.output, analysis_name = "myAnalysis", ove # create a subgroup called analysis_name under results.grp # check "df.output": make sure all columns are floats (i.e. numeric) - for (i_col in seq(1, ncol(df.output), by = 1)) { # for each column of df.output + for (i_col in seq(1, ncol(df.output), by = 1)) { + # for each column of df.output col_class <- as.character(sapply(df.output, class)[i_col]) # class of this column - if ((col_class != "numeric") && (col_class != "integer")) { # the column class is not numeric or integer + if ((col_class != "numeric") && + (col_class != "integer")) { + # the column class is not numeric or integer message( paste0( - "the column #", as.character(i_col), " of df.output to save: ", + "the column #", + as.character(i_col), + " of df.output to save: ", "data class is not numeric or integer...fixing it" ) ) From 777c59f49c51dac647140f2b775f1685a01259d9 Mon Sep 17 00:00:00 2001 From: mattcieslak Date: Mon, 15 Sep 2025 16:44:07 -0400 Subject: [PATCH 3/9] lint constructor --- R/ModelArray_Constructor.R | 206 +++++++++++++++++++++---------------- 1 file changed, 120 insertions(+), 86 deletions(-) diff --git a/R/ModelArray_Constructor.R b/R/ModelArray_Constructor.R index d4aa652..72bee5f 100644 --- a/R/ModelArray_Constructor.R +++ b/R/ModelArray_Constructor.R @@ -32,8 +32,9 @@ ModelArray <- setClass( #' @return The results slot of the object #' @aliases results #' @keywords internal -setGeneric("results", function(x, ...) - standardGeneric("results")) +setGeneric("results", function(x, ...) { + standardGeneric("results") +}) #' Access the scalars slot of an object #' @@ -48,8 +49,9 @@ setGeneric("results", function(x, ...) #' @return The scalars slot of the object #' @aliases scalars #' @keywords internal -setGeneric("scalars", function(x, ...) - standardGeneric("scalars")) +setGeneric("scalars", function(x, ...) { + standardGeneric("scalars") +}) #' Access the sources slot of an object #' @@ -63,8 +65,9 @@ setGeneric("scalars", function(x, ...) #' @return The sources slot of the object #' @aliases sources #' @keywords internal -setGeneric("sources", function(x) - standardGeneric("sources")) +setGeneric("sources", function(x) { + standardGeneric("sources") +}) #' Access the results slot of a ModelArray object @@ -80,8 +83,9 @@ setGeneric("sources", function(x) #' @return The results slot of the ModelArray object #' @keywords internal #' @export -setMethod("results", "ModelArray", function(x, ...) - x@results) +setMethod("results", "ModelArray", function(x, ...) { + x@results +}) #' Access the scalars slot of a ModelArray object #' @@ -96,8 +100,9 @@ setMethod("results", "ModelArray", function(x, ...) #' @return The scalars slot of the ModelArray object #' @keywords internal #' @export -setMethod("scalars", "ModelArray", function(x, ...) - x@scalars) +setMethod("scalars", "ModelArray", function(x, ...) { + x@scalars +}) #' Access the sources slot of a ModelArray object #' @@ -111,8 +116,9 @@ setMethod("scalars", "ModelArray", function(x, ...) #' @return The sources slot of the ModelArray object #' @keywords internal #' @export -setMethod("sources", "ModelArray", function(x) - x@sources) +setMethod("sources", "ModelArray", function(x) { + x@sources +}) @@ -137,6 +143,7 @@ ModelArraySeed <- function(filepath, name, type = NA) { } + #' Load element-wise data from .h5 file as an ModelArray object #' #' @details: @@ -233,7 +240,8 @@ ModelArray <- function(filepath, # exists # /results//has_names: names_results_matrix <- rhdf5::h5readAttributes(filepath, - name = sprintf("results/%s/results_matrix", analysis_name))$colnames # after updating writeResults() + name = sprintf("results/%s/results_matrix", analysis_name) + )$colnames # after updating writeResults() # names_results_matrix <- ModelArraySeed(filepath, name = sprintf( # "results/%s/has_names", analysis_name), type = NA) %>% @@ -413,27 +421,32 @@ analyseOneElement.lm <- function(i_element, # onemodel <- stats::lm(formula, data = dat, ...) # onemodel <- stats::lm(formula, data = dat, weights = myWeights,...) - onemodel <- tryCatch({ - do.call(stats::lm, arguments_lm) - }, error = function(e) { - msg <- paste0("analyseOneElement.lm error at element ", - i_element, - ": ", - conditionMessage(e)) - if (on_error == "debug" && interactive()) { - message(msg) - browser() - } - if (on_error == "skip" || on_error == "debug") { - warning(msg) - if (flag_initiate == TRUE) { - return(structure(list(.lm_error_initiate = TRUE), class = "lm_error")) - } else { - return(structure(list(.lm_error_runtime = TRUE), class = "lm_error")) + onemodel <- tryCatch( + { + do.call(stats::lm, arguments_lm) + }, + error = function(e) { + msg <- paste0( + "analyseOneElement.lm error at element ", + i_element, + ": ", + conditionMessage(e) + ) + if (on_error == "debug" && interactive()) { + message(msg) + browser() + } + if (on_error == "skip" || on_error == "debug") { + warning(msg) + if (flag_initiate == TRUE) { + return(structure(list(.lm_error_initiate = TRUE), class = "lm_error")) + } else { + return(structure(list(.lm_error_runtime = TRUE), class = "lm_error")) + } } + stop(e) } - stop(e) - }) + ) if (inherits(onemodel, "lm_error")) { if (flag_initiate == TRUE) { @@ -499,13 +512,15 @@ analyseOneElement.lm <- function(i_element, # all(union) = c(TRUE, FALSE, ...) temp <- union(temp_colnames, "term") # just an empty tibble (so below, all(dim(onemodel.tidy)) = FALSE) - if (all(temp == "term")) + if (all(temp == "term")) { onemodel.tidy <- tibble() + } temp_colnames <- onemodel.glance %>% colnames() temp <- union(temp_colnames, "term") # union of colnames and "term"; - if (all(temp == "term")) + if (all(temp == "term")) { onemodel.glance <- tibble() + } @@ -651,27 +666,32 @@ analyseOneElement.gam <- function(i_element, arguments$data <- dat # explicitly passing arguments into command, to avoid error of argument "weights" - onemodel <- tryCatch({ - do.call(mgcv::gam, arguments) - }, error = function(e) { - msg <- paste0("analyseOneElement.gam error at element ", - i_element, - ": ", - conditionMessage(e)) - if (on_error == "debug" && interactive()) { - message(msg) - browser() - } - if (on_error == "skip" || on_error == "debug") { - warning(msg) - if (flag_initiate == TRUE) { - return(structure(list(.gam_error_initiate = TRUE), class = "gam_error")) - } else { - return(structure(list(.gam_error_runtime = TRUE), class = "gam_error")) + onemodel <- tryCatch( + { + do.call(mgcv::gam, arguments) + }, + error = function(e) { + msg <- paste0( + "analyseOneElement.gam error at element ", + i_element, + ": ", + conditionMessage(e) + ) + if (on_error == "debug" && interactive()) { + message(msg) + browser() } + if (on_error == "skip" || on_error == "debug") { + warning(msg) + if (flag_initiate == TRUE) { + return(structure(list(.gam_error_initiate = TRUE), class = "gam_error")) + } else { + return(structure(list(.gam_error_runtime = TRUE), class = "gam_error")) + } + } + stop(e) } - stop(e) - }) + ) if (inherits(onemodel, "gam_error")) { if (flag_initiate == TRUE) { @@ -744,13 +764,17 @@ analyseOneElement.gam <- function(i_element, # remove those columns: if (length(var.smoothTerms.remove) != 0) { # if length=0, it's list(), nothing to remove - onemodel.tidy.smoothTerms <- dplyr::select(onemodel.tidy.smoothTerms, - -all_of(var.smoothTerms.remove)) + onemodel.tidy.smoothTerms <- dplyr::select( + onemodel.tidy.smoothTerms, + -all_of(var.smoothTerms.remove) + ) } if (length(var.parametricTerms.remove) != 0) { # if length=0, it's list(), nothing to remove - onemodel.tidy.parametricTerms <- dplyr::select(onemodel.tidy.parametricTerms, - -all_of(var.parametricTerms.remove)) + onemodel.tidy.parametricTerms <- dplyr::select( + onemodel.tidy.parametricTerms, + -all_of(var.parametricTerms.remove) + ) } if (length(var.model.remove) != 0) { onemodel.glance <- dplyr::select(onemodel.glance, -all_of(var.model.remove)) @@ -818,19 +842,22 @@ analyseOneElement.gam <- function(i_element, # union of colnames and "term"; if colnames only has "term" or lengt of 0 (tibble()), # union = "term", all(union)=TRUE; otherwise, if there is colnames other than "term", # all(union) = c(TRUE, FALSE, ...) - if (all(temp == "term")) + if (all(temp == "term")) { onemodel.tidy.smoothTerms <- tibble() + } # just an empty tibble (so below, all(dim(onemodel.tidy.smoothTerms)) = FALSE) temp_colnames <- onemodel.tidy.parametricTerms %>% colnames() temp <- union(temp_colnames, "term") - if (all(temp == "term")) - onemodel.tidy.parametricTerms <- tibble() # just an empty tibble + if (all(temp == "term")) { + onemodel.tidy.parametricTerms <- tibble() + } # just an empty tibble temp_colnames <- onemodel.glance %>% colnames() temp <- union(temp_colnames, "term") - if (all(temp == "term")) - onemodel.glance <- tibble() # just an empty tibble + if (all(temp == "term")) { + onemodel.glance <- tibble() + } # just an empty tibble # flatten .tidy results into one row: @@ -869,8 +896,10 @@ analyseOneElement.gam <- function(i_element, # combine the tables: check if any of them is empty (tibble()) - onemodel.onerow <- bind_cols_check_emptyTibble(onemodel.tidy.smoothTerms.onerow, - onemodel.tidy.parametricTerms.onerow) + onemodel.onerow <- bind_cols_check_emptyTibble( + onemodel.tidy.smoothTerms.onerow, + onemodel.tidy.parametricTerms.onerow + ) onemodel.onerow <- bind_cols_check_emptyTibble(onemodel.onerow, onemodel.glance.onerow) @@ -996,27 +1025,32 @@ analyseOneElement.wrap <- function(i_element, arguments$data <- dat # Execute user function with error handling - result <- tryCatch({ - do.call(user_fun, arguments) - }, error = function(e) { - msg <- paste0("analyseOneElement.wrap error at element ", - i_element, - ": ", - conditionMessage(e)) - if (on_error == "debug" && interactive()) { - message(msg) - browser() - } - if (on_error == "skip" || on_error == "debug") { - warning(msg) - if (flag_initiate == TRUE) { - return(structure(list(.wrap_error_initiate = TRUE), class = "wrap_error")) - } else { - return(structure(list(.wrap_error_runtime = TRUE), class = "wrap_error")) + result <- tryCatch( + { + do.call(user_fun, arguments) + }, + error = function(e) { + msg <- paste0( + "analyseOneElement.wrap error at element ", + i_element, + ": ", + conditionMessage(e) + ) + if (on_error == "debug" && interactive()) { + message(msg) + browser() + } + if (on_error == "skip" || on_error == "debug") { + warning(msg) + if (flag_initiate == TRUE) { + return(structure(list(.wrap_error_initiate = TRUE), class = "wrap_error")) + } else { + return(structure(list(.wrap_error_runtime = TRUE), class = "wrap_error")) + } } + stop(e) } - stop(e) - }) + ) # On error in user function, return NaNs according to flag if (inherits(result, "wrap_error")) { @@ -1117,7 +1151,7 @@ writeResults <- function(fn.output, # check if group "results\" exists: if (results.grp$exists(analysis_name) == TRUE && - overwrite == FALSE) { + overwrite == FALSE) { warning(paste0(analysis_name, " exists but not to overwrite!")) # TODO: add checker for exisiting analysis_name, esp the matrix size results.analysis.grp <- results.grp$open(analysis_name) @@ -1125,7 +1159,7 @@ writeResults <- function(fn.output, } else { # not exist; or exist && overwrite: to create if (results.grp$exists(analysis_name) == TRUE && - overwrite == TRUE) { + overwrite == TRUE) { # delete existing one first results.grp$link_delete(analysis_name) # NOTE: the file size will not shrink after your deletion.. @@ -1143,7 +1177,7 @@ writeResults <- function(fn.output, col_class <- as.character(sapply(df.output, class)[i_col]) # class of this column if ((col_class != "numeric") && - (col_class != "integer")) { + (col_class != "integer")) { # the column class is not numeric or integer message( paste0( From ac79851e3b92107ba6a3c17fb009fc5481825097 Mon Sep 17 00:00:00 2001 From: mattcieslak Date: Tue, 16 Sep 2025 21:14:51 -0400 Subject: [PATCH 4/9] add example df --- NAMESPACE | 4 +- R/ModelArray_Constructor.R | 154 ++++++++++++++++--------------------- R/ModelArray_S4Methods.R | 47 +++++++++++ R/analyse.R | 3 +- vignettes/walkthrough.Rmd | 82 +++++++++++++++----- vignettes/wrap.Rmd | 11 ++- 6 files changed, 187 insertions(+), 114 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 58316e9..e6d5bf3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,10 +11,8 @@ export(analyseOneElement.wrap) export(gen_gamFormula_contIx) export(gen_gamFormula_fxSmooth) export(numElementsTotal) -export(results) -export(scalars) -export(sources) export(writeResults) +exportMethods(exampleElementData) exportMethods(results) exportMethods(scalars) exportMethods(show) diff --git a/R/ModelArray_Constructor.R b/R/ModelArray_Constructor.R index 72bee5f..e817eae 100644 --- a/R/ModelArray_Constructor.R +++ b/R/ModelArray_Constructor.R @@ -240,8 +240,7 @@ ModelArray <- function(filepath, # exists # /results//has_names: names_results_matrix <- rhdf5::h5readAttributes(filepath, - name = sprintf("results/%s/results_matrix", analysis_name) - )$colnames # after updating writeResults() + name = sprintf("results/%s/results_matrix", analysis_name))$colnames # after updating writeResults() # names_results_matrix <- ModelArraySeed(filepath, name = sprintf( # "results/%s/has_names", analysis_name), type = NA) %>% @@ -421,32 +420,27 @@ analyseOneElement.lm <- function(i_element, # onemodel <- stats::lm(formula, data = dat, ...) # onemodel <- stats::lm(formula, data = dat, weights = myWeights,...) - onemodel <- tryCatch( - { - do.call(stats::lm, arguments_lm) - }, - error = function(e) { - msg <- paste0( - "analyseOneElement.lm error at element ", - i_element, - ": ", - conditionMessage(e) - ) - if (on_error == "debug" && interactive()) { - message(msg) - browser() - } - if (on_error == "skip" || on_error == "debug") { - warning(msg) - if (flag_initiate == TRUE) { - return(structure(list(.lm_error_initiate = TRUE), class = "lm_error")) - } else { - return(structure(list(.lm_error_runtime = TRUE), class = "lm_error")) - } + onemodel <- tryCatch({ + do.call(stats::lm, arguments_lm) + }, error = function(e) { + msg <- paste0("analyseOneElement.lm error at element ", + i_element, + ": ", + conditionMessage(e)) + if (on_error == "debug" && interactive()) { + message(msg) + browser() + } + if (on_error == "skip" || on_error == "debug") { + warning(msg) + if (flag_initiate == TRUE) { + return(structure(list(.lm_error_initiate = TRUE), class = "lm_error")) + } else { + return(structure(list(.lm_error_runtime = TRUE), class = "lm_error")) } - stop(e) } - ) + stop(e) + }) if (inherits(onemodel, "lm_error")) { if (flag_initiate == TRUE) { @@ -666,32 +660,27 @@ analyseOneElement.gam <- function(i_element, arguments$data <- dat # explicitly passing arguments into command, to avoid error of argument "weights" - onemodel <- tryCatch( - { - do.call(mgcv::gam, arguments) - }, - error = function(e) { - msg <- paste0( - "analyseOneElement.gam error at element ", - i_element, - ": ", - conditionMessage(e) - ) - if (on_error == "debug" && interactive()) { - message(msg) - browser() - } - if (on_error == "skip" || on_error == "debug") { - warning(msg) - if (flag_initiate == TRUE) { - return(structure(list(.gam_error_initiate = TRUE), class = "gam_error")) - } else { - return(structure(list(.gam_error_runtime = TRUE), class = "gam_error")) - } + onemodel <- tryCatch({ + do.call(mgcv::gam, arguments) + }, error = function(e) { + msg <- paste0("analyseOneElement.gam error at element ", + i_element, + ": ", + conditionMessage(e)) + if (on_error == "debug" && interactive()) { + message(msg) + browser() + } + if (on_error == "skip" || on_error == "debug") { + warning(msg) + if (flag_initiate == TRUE) { + return(structure(list(.gam_error_initiate = TRUE), class = "gam_error")) + } else { + return(structure(list(.gam_error_runtime = TRUE), class = "gam_error")) } - stop(e) } - ) + stop(e) + }) if (inherits(onemodel, "gam_error")) { if (flag_initiate == TRUE) { @@ -764,17 +753,11 @@ analyseOneElement.gam <- function(i_element, # remove those columns: if (length(var.smoothTerms.remove) != 0) { # if length=0, it's list(), nothing to remove - onemodel.tidy.smoothTerms <- dplyr::select( - onemodel.tidy.smoothTerms, - -all_of(var.smoothTerms.remove) - ) + onemodel.tidy.smoothTerms <- dplyr::select(onemodel.tidy.smoothTerms,-all_of(var.smoothTerms.remove)) } if (length(var.parametricTerms.remove) != 0) { # if length=0, it's list(), nothing to remove - onemodel.tidy.parametricTerms <- dplyr::select( - onemodel.tidy.parametricTerms, - -all_of(var.parametricTerms.remove) - ) + onemodel.tidy.parametricTerms <- dplyr::select(onemodel.tidy.parametricTerms,-all_of(var.parametricTerms.remove)) } if (length(var.model.remove) != 0) { onemodel.glance <- dplyr::select(onemodel.glance, -all_of(var.model.remove)) @@ -896,10 +879,8 @@ analyseOneElement.gam <- function(i_element, # combine the tables: check if any of them is empty (tibble()) - onemodel.onerow <- bind_cols_check_emptyTibble( - onemodel.tidy.smoothTerms.onerow, - onemodel.tidy.parametricTerms.onerow - ) + onemodel.onerow <- bind_cols_check_emptyTibble(onemodel.tidy.smoothTerms.onerow, + onemodel.tidy.parametricTerms.onerow) onemodel.onerow <- bind_cols_check_emptyTibble(onemodel.onerow, onemodel.glance.onerow) @@ -1025,32 +1006,27 @@ analyseOneElement.wrap <- function(i_element, arguments$data <- dat # Execute user function with error handling - result <- tryCatch( - { - do.call(user_fun, arguments) - }, - error = function(e) { - msg <- paste0( - "analyseOneElement.wrap error at element ", - i_element, - ": ", - conditionMessage(e) - ) - if (on_error == "debug" && interactive()) { - message(msg) - browser() - } - if (on_error == "skip" || on_error == "debug") { - warning(msg) - if (flag_initiate == TRUE) { - return(structure(list(.wrap_error_initiate = TRUE), class = "wrap_error")) - } else { - return(structure(list(.wrap_error_runtime = TRUE), class = "wrap_error")) - } + result <- tryCatch({ + do.call(user_fun, arguments) + }, error = function(e) { + msg <- paste0("analyseOneElement.wrap error at element ", + i_element, + ": ", + conditionMessage(e)) + if (on_error == "debug" && interactive()) { + message(msg) + browser() + } + if (on_error == "skip" || on_error == "debug") { + warning(msg) + if (flag_initiate == TRUE) { + return(structure(list(.wrap_error_initiate = TRUE), class = "wrap_error")) + } else { + return(structure(list(.wrap_error_runtime = TRUE), class = "wrap_error")) } - stop(e) } - ) + stop(e) + }) # On error in user function, return NaNs according to flag if (inherits(result, "wrap_error")) { @@ -1151,7 +1127,7 @@ writeResults <- function(fn.output, # check if group "results\" exists: if (results.grp$exists(analysis_name) == TRUE && - overwrite == FALSE) { + overwrite == FALSE) { warning(paste0(analysis_name, " exists but not to overwrite!")) # TODO: add checker for exisiting analysis_name, esp the matrix size results.analysis.grp <- results.grp$open(analysis_name) @@ -1159,7 +1135,7 @@ writeResults <- function(fn.output, } else { # not exist; or exist && overwrite: to create if (results.grp$exists(analysis_name) == TRUE && - overwrite == TRUE) { + overwrite == TRUE) { # delete existing one first results.grp$link_delete(analysis_name) # NOTE: the file size will not shrink after your deletion.. @@ -1177,7 +1153,7 @@ writeResults <- function(fn.output, col_class <- as.character(sapply(df.output, class)[i_col]) # class of this column if ((col_class != "numeric") && - (col_class != "integer")) { + (col_class != "integer")) { # the column class is not numeric or integer message( paste0( diff --git a/R/ModelArray_S4Methods.R b/R/ModelArray_S4Methods.R index be68a33..b95e478 100644 --- a/R/ModelArray_S4Methods.R +++ b/R/ModelArray_S4Methods.R @@ -112,6 +112,53 @@ setMethod( # ) # ----------------above works. +### Example per-element data helper ##### + +#' @aliases exampleElementData +setGeneric("exampleElementData", function(x, ...) standardGeneric("exampleElementData")) + +#' Example per-element data.frame for user functions +#' +#' @description +#' Returns a copy of `phenotypes` with an extra column named by `scalar` populated +#' with the selected element's values from the `ModelArray`. This mirrors the +#' per-element data that `ModelArray.wrap` passes to user functions (`data = dat`). +#' +#' @param x An ModelArray object +#' @param scalar A character. The name of the element-wise scalar to append +#' @param i_element An integer, the i_th element (1-based) +#' @param phenotypes A data.frame of the cohort with independent variables/covariates +#' @return A data.frame with the additional response column named by `scalar` +#' @examples +#' \dontrun{ +#' h5_path <- system.file("extdata", "n50_fixels.h5", package = "ModelArray") +#' csv_path <- system.file("extdata", "n50_cohort.csv", package = "ModelArray") +#' ma <- ModelArray(h5_path, scalar_types = c("FD")) +#' phen <- read.csv(csv_path) +#' df <- exampleElementData(ma, scalar = "FD", i_element = 1, phenotypes = phen) +#' } +#' @export +setMethod( + "exampleElementData", + "ModelArray", + function(x, scalar = "FD", i_element = 1L, phenotypes) { + if (!is.data.frame(phenotypes)) { + stop("phenotypes must be a data.frame") + } + if (!(scalar %in% names(scalars(x)))) { + stop("scalar not found in modelarray; use one of names(scalars(x))") + } + num_elements <- nrow(scalars(x)[[scalar]]) + if (length(i_element) != 1L || is.na(i_element) || i_element < 1L || i_element > num_elements) { + stop("i_element is out of range") + } + + dat <- phenotypes + dat[[scalar]] <- scalars(x)[[scalar]][i_element, ] + dat + } +) + # # NOTE: ref: https://stackoverflow.com/questions/56560280/ # can-i-define-s4-methods-that-dispatch-on-more-than-one-argument-from-an-s3-gener # setGeneric("lm", function(formula, fixelarray, phenotypes, scalar, idx, ...) standardGeneric("lm"), diff --git a/R/analyse.R b/R/analyse.R index a7b1abb..c5ac558 100644 --- a/R/analyse.R +++ b/R/analyse.R @@ -670,7 +670,8 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = N correct.p.value.smoothTerms = c("fdr"), correct.p.value.parametricTerms = c("fdr"), num.subj.lthr.abs = 10, num.subj.lthr.rel = 0.2, - verbose = TRUE, pbar = TRUE, n_cores = 1, ...) { + verbose = TRUE, pbar = TRUE, n_cores = 1, + on_error = "stop", ...) { # data type assertions if (class(data) != "ModelArray") { stop("data's class is not ModelArray!") diff --git a/vignettes/walkthrough.Rmd b/vignettes/walkthrough.Rmd index d576d32..1304650 100644 --- a/vignettes/walkthrough.Rmd +++ b/vignettes/walkthrough.Rmd @@ -9,11 +9,19 @@ vignette: > ## Introduction -In this example walkthrough, we will use some example **fixel**-wise data to demonstrate the steps of using `ModelArray` and its companion python package, `ConFixel`. Analysis for **voxel**-wise data follows similar steps, but there are several differences when execution; for more details regarding application to voxel-wise data, please refer to `vignette("voxel-wise_data")` page. We also provide hints for voxel-wise data throughout current walkthrough. +In this example walkthrough, we will use some example **fixel**-wise data to demonstrate the steps of using `ModelArray` and its companion python package, `ConFixel`. +Analysis for **voxel**-wise data follows similar steps, but there are several differences when execution; +for more details regarding application to voxel-wise data, please refer to `vignette("voxel-wise_data")` page. +We also provide hints for voxel-wise data throughout current walkthrough. -By following the `vignette("installations")` page, to perform statistical analysis for fixel-wise data, you should have successfully installed `ModelArray`, `ConFixel`, and `MRtrix`. We expect that `ConFixel` has been installed in a conda environment called `modelarray`. +By following the `vignette("installations")` page, +to perform statistical analysis for fixel-wise data, +you should have successfully installed `ModelArray`, `ConFixel`, and `MRtrix`. +We expect that `ConFixel` has been installed in a conda environment called `modelarray`. -We will first prepare the data and convert it into the format that `ModelArray` requires (Step 1), then we'll use `ModelArray` to perform the statistical analysis (Step 2). Finally we will convert the results into original file format and view them (Step 3). +We will first prepare the data and convert it into the format that `ModelArray` requires (Step 1), +then we'll use `ModelArray` to perform the statistical analysis (Step 2). +Finally we will convert the results into original file format and view them (Step 3). ## Step 1. Prepare your data @@ -27,7 +35,10 @@ $ pwd # print the full path of this folder ``` On a linux machine, you'll see the printed full path of this folder is `/home//Desktop/myProject` -Here, we provide some demo fixel-wise data. This demo data includes 100 subjects aged 8-22 years from the Philadelphia Neurodevelopmental Cohort (PNC) [Satterthwaite et al., 2014](https://doi.org/10.1016/j.neuroimage.2013.07.064). This data was generated by following fixel-based analysis [Raffelt et al., 2017](https://doi.org/10.1016/j.neuroimage.2016.09.029) and are ready for fixel-wise statistical analysis. You can get the data by running the following: +Here, we provide some demo fixel-wise data. +This demo data includes 100 subjects aged 8-22 years from the Philadelphia Neurodevelopmental Cohort (PNC) [Satterthwaite et al., 2014](https://doi.org/10.1016/j.neuroimage.2013.07.064). +This data was generated by following fixel-based analysis [Raffelt et al., 2017](https://doi.org/10.1016/j.neuroimage.2016.09.029) and are ready for fixel-wise statistical analysis. +You can get the data by running the following: ``` {.console} $ wget -cO - https://osf.io/tce9d/download > download.tar.gz @@ -55,11 +66,16 @@ The data is organized in the following way: ``` -In this demo data, the fixel-wise metric is `FDC`. As you can see here, in folder `FDC`, besides subject-level fixel-wise data in template space (e.g. `sub-010b693.mif`), there are also the files `index.mif` and `directions.mif`. These two files provide important information of the fixel locations - you can learn about their definitions [here](https://userdocs.mrtrix.org/en/dev/fixel_based_analysis/fixel_directory_format.html). +In this demo data, the fixel-wise metric is `FDC`. +As you can see here, in folder `FDC`, besides subject-level fixel-wise data in template space (e.g. `sub-010b693.mif`), there are also the files `index.mif` and `directions.mif`. +These two files provide important information of the fixel locations - +you can learn about their definitions [here](https://userdocs.mrtrix.org/en/dev/fixel_based_analysis/fixel_directory_format.html). ### Step 1.2. Prepare a CSV file of cohort phenotypes -In addition to fixel-wise data, we also need a CSV file of cohort phenotypes. This file will be used by both `ConFixel` and `ModelArray`. Here's an example: `cohort_FDC_n100.csv`: +In addition to fixel-wise data, we also need a CSV file of cohort phenotypes. +This file will be used by both `ConFixel` and `ModelArray`. +Here's an example: `cohort_FDC_n100.csv`: | subject_id | Age | sex | dti64MeanRelRMS | ***scalar_name*** | ***source_file*** | |:-----------:|:----:|:---:|:---------------:|:-----------:|:-------------------:| @@ -76,7 +92,11 @@ The table in this CSV file includes these columns: ### Step 1.3. Convert data into an HDF5 file using ConFixel -One of the reasons why `ModelArray` is memory efficient is it takes advantages of Hierarchical Data Format 5 (HDF5) file format. The extension of this file format is `.h5`. In short, an HDF5 file stores large datasets hierarchically. Let's use `ConFixel` to convert the list of mif files into an HDF5 file. In the terminal console: +One of the reasons why `ModelArray` is memory efficient is it takes advantages of Hierarchical Data Format 5 (HDF5) file format. +The extension of this file format is `.h5`. +In short, an HDF5 file stores large datasets hierarchically. +Let's use `ConFixel` to convert the list of mif files into an HDF5 file. +In the terminal console: ``` {.console} $ conda activate modelarray @@ -91,7 +111,8 @@ $ confixel \ # we recommend it's a full path too! ``` -As mentioned before, here we take `/home//Desktop/myProject` as the main folder; you won't need to repeat paths once you've used the `--relative-root` flag. +As mentioned before, here we take `/home//Desktop/myProject` as the main folder; +you won't need to repeat paths once you've used the `--relative-root` flag. When running `confixel`, you will see a moving progress bar. When it finishes, it looks like this: @@ -104,7 +125,9 @@ Now you got the converted HDF5 file `demo_FDC_n100.h5` in folder `~/Desktop/myPr ## Step 2. Use ModelArray to perform statistical analysis -The next step is to use this HDF5 file and the CSV file we prepared to perform statistical analysis in R. If you installed RStudio (which we recommend), then you can simply launch RStudio. All the commands in this Step 2 section will be run in R. +The next step is to use this HDF5 file and the CSV file we prepared to perform statistical analysis in R. +If you installed RStudio (which we recommend), then you can simply launch RStudio. +All the commands in this Step 2 section will be run in R. > Hint for voxel-wise data for Step 2: You can follow the same steps here. Note that each "element" is a *voxel* now, instead of a *fixel*. Make sure you replace the scalar name, covariates, file paths etc with yours. In addition, as voxel-wise data may have subject-specific masks, you can also tailor the lower threshold of number of subjects when applying `ModelArray.lm()` and `ModelArray.gam()`. See `vignette("voxel-wise_data")` page for more. @@ -128,6 +151,7 @@ modelarray <- ModelArray(h5_path, scalar_types = c("FDC")) # let's check what's in it: modelarray ``` + You'll see: ```{.console} ModelArray located at ~/Desktop/myProject/demo_FDC_n100.h5 @@ -176,9 +200,16 @@ phenotypes <- read.csv(csv_path) As this CSV file already provides sufficient covariates ready for analysis below, we don't need to do extra work on it. ### Step 2.4. Perform statistical analysis -With both `modelarray` and data frame `phenotypes` set up, we can now perform the statistical analysis. At present, `ModelArray` supports linear models as well as generalized additive models (GAM) with and without penalized splines, which are particularly useful for studying nonlinear effects in lifespan data. +With both `modelarray` and data frame `phenotypes` set up, we can now perform the statistical analysis. +At present, `ModelArray` supports linear models as well as generalized additive models (GAM) with and without penalized splines, +which are particularly useful for studying nonlinear effects in lifespan data. -Let's start with linear model `ModelArray.lm()`. We first need a formula defining the model. We mainly want to test how fixel FDC changes with age in adolescence, assuming it's a linear relationship. We also add sex and in-scanner motion quantification `dti64MeanRelRMS` as covariates. Intercept will be automatically added. We first try it out on first 100 fixels before we running on all fixels: +Let's start with linear model `ModelArray.lm()`. +We first need a formula defining the model. +We mainly want to test how fixel FDC changes with age in adolescence, assuming it's a linear relationship. +We also add sex and in-scanner motion quantification `dti64MeanRelRMS` as covariates. +Intercept will be automatically added. +We first try it out on first 100 fixels before we running on all fixels: ```{r example lm, eval=FALSE} # formula: @@ -207,7 +238,8 @@ looping across elements.... |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=06s ``` -It first printed how several options for `lm` were set, then worked on the model fitting. Let's check the first 6 rows of the results, i.e. results of the first 6 fixels: +It first printed how several options for `lm` were set, then worked on the model fitting. +Let's check the first 6 rows of the results, i.e. results of the first 6 fixels: ```{r, eval=FALSE} head(mylm.try) ``` @@ -237,7 +269,9 @@ As you can see, for each fixel, by default, `ModelArray.lm()` returns: You may request more comprehensive statistics with the argument `full.outputs=TRUE`. -Now let's try out `ModelArray.gam()`. The example commands are as below. Compared to a linear model, GAMs flexibly model linear and nonlinear effects by using smooth functions; penalty can also be applied to avoid over-fitting. +Now let's try out `ModelArray.gam()`. +The example commands are as below. Compared to a linear model, +GAMs flexibly model linear and nonlinear effects by using smooth functions; penalty can also be applied to avoid over-fitting. Here we use `s(Age)` to indicate that this will be fit as a spline smooth function. The `k=4` argument sets an upper limit on the degrees of freedom for this smooth function. Restricted maximum likelihood (`REML`) is used for fitting. @@ -295,9 +329,13 @@ For more options, please view pages for `ModelArray.lm()` and `ModelArray.gam()` ### Step 2.5. A full run and saving the results -Previous examples only ran on a small subset of the fixels. Now we'll formally run across all fixels and save the results. Because running all fixels will take a substantial amount of time, here we only run the linear model. +Previous examples only ran on a small subset of the fixels. +Now we'll formally run across all fixels and save the results. +Because running all fixels will take a substantial amount of time, here we only run the linear model. -Notice that the command below uses the default value of `element.subset=NULL` so that all fixels will be analyzed. Also notice that, to speed up, we're requesting 4 CPU cores to run in parallel. You may adjust this number based on how many CPU cores you have on your machine. On a Linux machine with Intel(R) Xeon(R) 10th Gen CPU @ 2.81GHz and using 4 CPU cores, it takes around 2.5 hours to finish. +Notice that the command below uses the default value of `element.subset=NULL` so that all fixels will be analyzed. +Also notice that, to speed up, we're requesting 4 CPU cores to run in parallel. You may adjust this number based on how many CPU cores you have on your machine. +On a Linux machine with Intel(R) Xeon(R) 10th Gen CPU @ 2.81GHz and using 4 CPU cores, it takes around 2.5 hours to finish. ```{r, eval=FALSE} # run linear model fitting with ModelArray.lm() on the all fixels: @@ -312,7 +350,8 @@ We now save the results data frame into the original h5 file: writeResults(h5_path, df.output = mylm, analysis_name = "results_lm") ``` -Notice that the the analysis name specified in argument `analysis_name` will be used in `ConFixel` in the next step when converting results back to fixel mif file format. It'll also be used as the prefix of the mif files to be saved. +Notice that the the analysis name specified in argument `analysis_name` will be used in `ConFixel` in the next step when converting results back to fixel mif file format. +It'll also be used as the prefix of the mif files to be saved. We can even check out the saved results in the h5 file (this is optional): @@ -403,7 +442,9 @@ $ cd results_lm # switch to results folder $ mrview ``` -Click File -> Open, select `index.mif`. Then click Tools -> Fixel plot, you'll see a side panel of "Fixel plot". Within this side panel, click button "Open fixel image" (see below, highlighted in red): +Click File -> Open, select `index.mif`. +Then click Tools -> Fixel plot, you'll see a side panel of "Fixel plot". +Within this side panel, click button "Open fixel image" (see below, highlighted in red):
@@ -411,7 +452,12 @@ Click File -> Open, select `index.mif`. Then click Tools -> Fixel plot, you'll s
-From there, select `index.mif` file again. You'll see colored `directions.mif`. Now you can choose which image to display. Click the filename next to "colour by", select `results_lm_Age.estimate.mif`; then click the button next to "threshold by", select `results_lm_model.p.value.fdr.mif`, check/tick the option of upper limit, then enter "0.005". You may change the view by clicking "View" in the upper panel. Below is an example view: +From there, select `index.mif` file again. +You'll see colored `directions.mif`. +Now you can choose which image to display. +Click the filename next to "colour by", select `results_lm_Age.estimate.mif`; +then click the button next to "threshold by", select `results_lm_model.p.value.fdr.mif`, check/tick the option of upper limit, then enter "0.005". +You may change the view by clicking "View" in the upper panel. Below is an example view:
diff --git a/vignettes/wrap.Rmd b/vignettes/wrap.Rmd index 6400e2f..8d1a61c 100644 --- a/vignettes/wrap.Rmd +++ b/vignettes/wrap.Rmd @@ -11,13 +11,15 @@ vignette: > `ModelArray.wrap` lets you run your own per-element analysis function over all elements in a `ModelArray`, reusing the same alignment, subject-threshold checks, and parallel looping machinery as `ModelArray.lm` and `ModelArray.gam`. -- You provide a function `FUN` that operates on a single element by receiving a `data` frame (the cohort `phenotypes` with an added response column named by `scalar`). +- You provide a function `FUN` that operates on a single element by receiving a `dataframe` (the cohort `phenotypes` with an added response column named by `scalar`). - `FUN` should return a one-row data.frame/tibble, a named list, or a named vector. The column names from the first successful element define the output schema. - The result is a combined data.frame where the first column is `element_id` (zero-based). This vignette shows how to write a `FUN` that produces outputs equivalent to the defaults of `ModelArray.lm`. -Important: `ModelArray.wrap` does not automatically adjust or otherwise modify p-values. If you want adjusted p-values (e.g., FDR), compute them inside your custom function `FUN` (as shown below for FDR). +Important: `ModelArray.wrap` does not automatically adjust or otherwise modify p-values. +Your function `FUN` only sees one element at a time, so it cannot perform p-value corrections. + ## Example: Using ModelArray.wrap to reproduce ModelArray.lm @@ -122,7 +124,10 @@ writeResults(h5_path, df.output = res_wrap_lm, analysis_name = "results_wrap_lm" ## Example: Computing simple group means -You can also use `ModelArray.wrap` to compute very simple statistics, such as group means of the response for each element. The example below computes the mean value of the response (the `scalar` column) within each level of a grouping variable (for example, `sex`), and also reports the difference between the first two levels if present. +You can also use `ModelArray.wrap` to compute very simple statistics, such as group means of the response for each element. +The example below computes the mean value of the response +(the `scalar` column) within each level of a grouping variable (for example, `sex`), +and also reports the difference between the first two levels if present. ```r #' One-element group means across levels of a grouping variable From 53bc95342d23da3ff82e2c59dbedf4ab8fd5df5c Mon Sep 17 00:00:00 2001 From: mattcieslak Date: Tue, 16 Sep 2025 21:15:23 -0400 Subject: [PATCH 5/9] Update manpages --- man/ModelArray.gam.Rd | 1 + man/ModelArray.lm.Rd | 1 + man/ModelArray.wrap.Rd | 64 +++++++++++++++++++++ man/analyseOneElement.gam.Rd | 1 + man/analyseOneElement.lm.Rd | 1 + man/analyseOneElement.wrap.Rd | 59 +++++++++++++++++++ man/exampleElementData-ModelArray-method.Rd | 34 +++++++++++ man/results-ModelArray-method.Rd | 1 + man/results.Rd | 1 + man/scalars-ModelArray-method.Rd | 1 + man/scalars.Rd | 1 + man/sources-ModelArray-method.Rd | 1 + man/sources.Rd | 1 + 13 files changed, 167 insertions(+) create mode 100644 man/ModelArray.wrap.Rd create mode 100644 man/analyseOneElement.wrap.Rd create mode 100644 man/exampleElementData-ModelArray-method.Rd diff --git a/man/ModelArray.gam.Rd b/man/ModelArray.gam.Rd index d3124ea..5aba0f2 100644 --- a/man/ModelArray.gam.Rd +++ b/man/ModelArray.gam.Rd @@ -22,6 +22,7 @@ ModelArray.gam( verbose = TRUE, pbar = TRUE, n_cores = 1, + on_error = "stop", ... ) } diff --git a/man/ModelArray.lm.Rd b/man/ModelArray.lm.Rd index 722a02a..1c24ac4 100644 --- a/man/ModelArray.lm.Rd +++ b/man/ModelArray.lm.Rd @@ -20,6 +20,7 @@ ModelArray.lm( verbose = TRUE, pbar = TRUE, n_cores = 1, + on_error = "stop", ... ) } diff --git a/man/ModelArray.wrap.Rd b/man/ModelArray.wrap.Rd new file mode 100644 index 0000000..046cfb6 --- /dev/null +++ b/man/ModelArray.wrap.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/analyse.R +\name{ModelArray.wrap} +\alias{ModelArray.wrap} +\title{Run a user-supplied function for element-wise data} +\usage{ +ModelArray.wrap( + FUN, + data, + phenotypes, + scalar, + element.subset = NULL, + num.subj.lthr.abs = 10, + num.subj.lthr.rel = 0.2, + verbose = TRUE, + pbar = TRUE, + n_cores = 1, + on_error = "stop", + ... +) +} +\arguments{ +\item{FUN}{A function that accepts at least `data` and returns a one-row data.frame/tibble, +a named list, or a named vector of results for that element.} + +\item{data}{ModelArray class} + +\item{phenotypes}{A data.frame with cohort covariates; must contain column `source_file`. +It must match `sources(data)[[scalar]]` order and contents (reordered if needed).} + +\item{scalar}{Character, the scalar name to analyze} + +\item{element.subset}{Optional integer vector of element indices (1-based); defaults to all} + +\item{num.subj.lthr.abs}{Integer lower threshold for subjects with finite values (default 10)} + +\item{num.subj.lthr.rel}{Relative lower threshold (0-1) (default 0.2)} + +\item{verbose}{TRUE/FALSE to print messages} + +\item{pbar}{TRUE/FALSE to show progress bar} + +\item{n_cores}{Positive integer number of CPU cores} + +\item{...}{Additional arguments forwarded to `FUN`} +} +\value{ +Tibble/data.frame with one row per element and first column `element_id` +} +\description{ +`ModelArray.wrap` runs a user-supplied function `FUN` for each requested element and +returns a tibble/data.frame of results combined across elements. +} +\details{ +This provides a generic framework reusing ModelArray's per-element looping, alignment, +subject-thresholding, and parallelization. The user function will be called as +`FUN(data = dat, ...)` where `dat` is `phenotypes` with the response column named `scalar` +appended for the current element. The return value from `FUN` for a single element must be +either a one-row data.frame/tibble, a named list, or a named atomic vector. The column +names from the first successful element determine the final schema. + +Note: `ModelArray.wrap` never performs any p-value corrections or modifications. +If you need adjusted p-values (e.g., FDR), implement them inside your provided `FUN`. +} diff --git a/man/analyseOneElement.gam.Rd b/man/analyseOneElement.gam.Rd index 63fd8df..a5ea013 100644 --- a/man/analyseOneElement.gam.Rd +++ b/man/analyseOneElement.gam.Rd @@ -17,6 +17,7 @@ analyseOneElement.gam( num.stat.output = NULL, flag_initiate = FALSE, flag_sse = FALSE, + on_error = "stop", ... ) } diff --git a/man/analyseOneElement.lm.Rd b/man/analyseOneElement.lm.Rd index 898a746..d673936 100644 --- a/man/analyseOneElement.lm.Rd +++ b/man/analyseOneElement.lm.Rd @@ -15,6 +15,7 @@ analyseOneElement.lm( num.subj.lthr, num.stat.output = NULL, flag_initiate = FALSE, + on_error = "stop", ... ) } diff --git a/man/analyseOneElement.wrap.Rd b/man/analyseOneElement.wrap.Rd new file mode 100644 index 0000000..762e4db --- /dev/null +++ b/man/analyseOneElement.wrap.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ModelArray_Constructor.R +\name{analyseOneElement.wrap} +\alias{analyseOneElement.wrap} +\title{Run a user-supplied function for one element} +\usage{ +analyseOneElement.wrap( + i_element, + user_fun, + modelarray, + phenotypes, + scalar, + num.subj.lthr, + num.stat.output = NULL, + flag_initiate = FALSE, + on_error = "stop", + ... +) +} +\arguments{ +\item{i_element}{An integer, the i_th element, starting from 1.} + +\item{user_fun}{A function that accepts at least an argument named `data` (the per-element +`phenotypes` with the response column appended) and returns a one-row data.frame/tibble, +named list, or named vector.} + +\item{modelarray}{ModelArray class} + +\item{phenotypes}{A data.frame of the cohort with columns of independent variables and covariates} + +\item{scalar}{A character. The name of the element-wise scalar to be analysed} + +\item{num.subj.lthr}{The minimal number of subjects with valid value in input h5 file} + +\item{num.stat.output}{The number of output stat metrics (including `element_id`). +Required when `flag_initiate = TRUE`.} + +\item{flag_initiate}{TRUE or FALSE, whether this is to initiate the new analysis to get column names} + +\item{on_error}{Character: one of "stop", "skip", or "debug". When an error occurs while +executing the user function, choose whether to stop, skip returning all-NaN values for this +element, or drop into `browser()` (if interactive) then skip. Default: "stop".} + +\item{...}{Additional arguments forwarded to `FUN`} +} +\value{ +If `flag_initiate==TRUE`, returns a list with `column_names`. +If `flag_initiate==FALSE`, returns a numeric vector representing the one-row result for this element +with `element_id` as the first value. +} +\description{ +`analyseOneElement.wrap` runs a user-supplied function `FUN` on a single element's data. +It prepares the per-element data by attaching the element values as a new column named by `scalar` +to the provided `phenotypes` data.frame, then calls `FUN(data = dat, ...)`. +} +\details{ +The user-supplied `FUN` should return either a one-row data.frame/tibble, a named list, or a named vector. +The result will be coerced to a one-row tibble and combined into the final results matrix across elements. +} diff --git a/man/exampleElementData-ModelArray-method.Rd b/man/exampleElementData-ModelArray-method.Rd new file mode 100644 index 0000000..92e072b --- /dev/null +++ b/man/exampleElementData-ModelArray-method.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ModelArray_S4Methods.R +\name{exampleElementData,ModelArray-method} +\alias{exampleElementData,ModelArray-method} +\title{Example per-element data.frame for user functions} +\usage{ +\S4method{exampleElementData}{ModelArray}(x, scalar = "FD", i_element = 1L, phenotypes) +} +\arguments{ +\item{x}{An ModelArray object} + +\item{scalar}{A character. The name of the element-wise scalar to append} + +\item{i_element}{An integer, the i_th element (1-based)} + +\item{phenotypes}{A data.frame of the cohort with independent variables/covariates} +} +\value{ +A data.frame with the additional response column named by `scalar` +} +\description{ +Returns a copy of `phenotypes` with an extra column named by `scalar` populated +with the selected element's values from the `ModelArray`. This mirrors the +per-element data that `ModelArray.wrap` passes to user functions (`data = dat`). +} +\examples{ +\dontrun{ +h5_path <- system.file("extdata", "n50_fixels.h5", package = "ModelArray") +csv_path <- system.file("extdata", "n50_cohort.csv", package = "ModelArray") +ma <- ModelArray(h5_path, scalar_types = c("FD")) +phen <- read.csv(csv_path) +df <- exampleElementData(ma, scalar = "FD", i_element = 1, phenotypes = phen) +} +} diff --git a/man/results-ModelArray-method.Rd b/man/results-ModelArray-method.Rd index 6ddff06..05b064f 100644 --- a/man/results-ModelArray-method.Rd +++ b/man/results-ModelArray-method.Rd @@ -22,3 +22,4 @@ Statistical results in this ModelArray object \description{ Method for accessing the results slot of a ModelArray object. } +\keyword{internal} diff --git a/man/results.Rd b/man/results.Rd index 0fcb9d0..e846c41 100644 --- a/man/results.Rd +++ b/man/results.Rd @@ -17,3 +17,4 @@ The results slot of the object \description{ Generic function to access the results slot of an object. } +\keyword{internal} diff --git a/man/scalars-ModelArray-method.Rd b/man/scalars-ModelArray-method.Rd index b5e8fbf..27a49ab 100644 --- a/man/scalars-ModelArray-method.Rd +++ b/man/scalars-ModelArray-method.Rd @@ -22,3 +22,4 @@ A matrix of element-wise scalar data: elements (row) by source files (column). \description{ Method for accessing the scalars slot of a ModelArray object. } +\keyword{internal} diff --git a/man/scalars.Rd b/man/scalars.Rd index fdb7eb2..b93cbf4 100644 --- a/man/scalars.Rd +++ b/man/scalars.Rd @@ -17,3 +17,4 @@ The scalars slot of the object \description{ Generic function to access the scalars slot of an object. } +\keyword{internal} diff --git a/man/sources-ModelArray-method.Rd b/man/sources-ModelArray-method.Rd index 10b3ec5..5df4c29 100644 --- a/man/sources-ModelArray-method.Rd +++ b/man/sources-ModelArray-method.Rd @@ -20,3 +20,4 @@ A list of source filenames \description{ Method for accessing the sources slot of a ModelArray object. } +\keyword{internal} diff --git a/man/sources.Rd b/man/sources.Rd index dfed9d5..a0143d0 100644 --- a/man/sources.Rd +++ b/man/sources.Rd @@ -15,3 +15,4 @@ The sources slot of the object \description{ Generic function to access the sources slot of an object. } +\keyword{internal} From 26808e3f93bec3d734d46347d5357ebb94f1872d Mon Sep 17 00:00:00 2001 From: mattcieslak Date: Wed, 17 Sep 2025 18:24:28 -0400 Subject: [PATCH 6/9] docs(vignettes): replace wrap.Rmd with wrap_function.Rmd; add pkgdown redirect from articles/wrap.html to articles/wrap_function.html --- _pkgdown.yml | 4 + vignettes/wrap.Rmd | 187 ---------------------- vignettes/wrap_function.Rmd | 302 ++++++++++++++++++++++++++++++++++++ 3 files changed, 306 insertions(+), 187 deletions(-) delete mode 100644 vignettes/wrap.Rmd create mode 100644 vignettes/wrap_function.Rmd diff --git a/_pkgdown.yml b/_pkgdown.yml index fc19e8d..06e6d15 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -2,3 +2,7 @@ url: https://pennlinc.github.io/ModelArray/ template: bootstrap: 5 +redirects: + - from: articles/wrap.html + to: articles/wrap_function.html + diff --git a/vignettes/wrap.Rmd b/vignettes/wrap.Rmd deleted file mode 100644 index 8d1a61c..0000000 --- a/vignettes/wrap.Rmd +++ /dev/null @@ -1,187 +0,0 @@ ---- -title: "Writing custom element-wise analyses with ModelArray.wrap" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Writing custom element-wise analyses with ModelArray.wrap} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -## Overview - -`ModelArray.wrap` lets you run your own per-element analysis function over all elements in a `ModelArray`, reusing the same alignment, subject-threshold checks, and parallel looping machinery as `ModelArray.lm` and `ModelArray.gam`. - -- You provide a function `FUN` that operates on a single element by receiving a `dataframe` (the cohort `phenotypes` with an added response column named by `scalar`). -- `FUN` should return a one-row data.frame/tibble, a named list, or a named vector. The column names from the first successful element define the output schema. -- The result is a combined data.frame where the first column is `element_id` (zero-based). - -This vignette shows how to write a `FUN` that produces outputs equivalent to the defaults of `ModelArray.lm`. - -Important: `ModelArray.wrap` does not automatically adjust or otherwise modify p-values. -Your function `FUN` only sees one element at a time, so it cannot perform p-value corrections. - - -## Example: Using ModelArray.wrap to reproduce ModelArray.lm - -Below, we define `my_lm_fun()` that: - -- fits `stats::lm(formula, data=data)`; -- extracts term-level stats via `broom::tidy()` and selects the default fields used by `ModelArray.lm` (`estimate`, `statistic`, `p.value`); -- extracts model-level stats via `broom::glance()` and selects defaults (`adj.r.squared`, `p.value`); -- pivots term and model summaries into a single one-row tibble with column names like `Age.estimate`, `Age.statistic`, `Age.p.value`, `model.adj.r.squared`, `model.p.value`; -- adds FDR-corrected p-values for terms and the model to match `ModelArray.lm` defaults. - -```r -#' One-element LM equivalent used by ModelArray.wrap -my_lm_fun <- function(data, formula) { - # Fit lm - fit <- stats::lm(formula = formula, data = data) - - # Term-level stats - td <- broom::tidy(fit) - if (nrow(td) > 0) { - td$term[td$term == "(Intercept)"] <- "Intercept" - td <- td[, intersect(colnames(td), c("term", "estimate", "statistic", "p.value"))] - } - - # Model-level stats - gl <- broom::glance(fit) - gl$term <- "model" - gl <- gl[, intersect(colnames(gl), c("term", "adj.r.squared", "p.value"))] - - # Pivot to one row per element - library(tidyr) - library(dplyr) - - td_w <- if (nrow(td) > 0) { - pivot_wider(td, names_from = term, values_from = c(estimate, statistic, p.value), - names_glue = "{term}.{.value}") - } else td - - gl_w <- pivot_wider(gl, names_from = term, values_from = c(adj.r.squared, p.value), - names_glue = "{term}.{.value}") - - out <- bind_cols(td_w, gl_w) - - # Add FDR-corrected p-values to mirror ModelArray.lm defaults - term_names <- gsub("\\.estimate$|\\.statistic$|\\.p.value$", "", grep("\\.estimate$|\\.statistic$|\\.p.value$", names(out), value = TRUE)) - term_names <- unique(term_names[term_names != "model"]) # exclude model - - for (nm in term_names) { - pcol <- paste0(nm, ".p.value") - if (pcol %in% names(out)) { - out[[paste0(pcol, ".fdr")]] <- stats::p.adjust(out[[pcol]], method = "fdr") - } - } - if ("model.p.value" %in% names(out)) { - out[["model.p.value.fdr"]] <- stats::p.adjust(out[["model.p.value"]], method = "fdr") - } - - # Ensure it's a one-row tibble - tibble::as_tibble(out) -} -``` - -With the function defined, we can run it across elements with `ModelArray.wrap`. The example mirrors the linear model in the walkthrough vignette and runs on the first 100 elements for speed. - -```r -library(ModelArray) - -# Paths and data (adapt these to your setup) -h5_path <- "~/Desktop/myProject/demo_FDC_n100.h5" -csv_path <- "~/Desktop/myProject/cohort_FDC_n100.csv" -modelarray <- ModelArray(h5_path, scalar_types = c("FDC")) -phenotypes <- read.csv(csv_path) - -# Same formula used by ModelArray.lm -formula_lm <- FDC ~ Age + sex + dti64MeanRelRMS - -# Run user-wrapped LM on the first 100 elements -res_wrap_lm <- ModelArray.wrap( - FUN = my_lm_fun, - data = modelarray, - phenotypes = phenotypes, - scalar = "FDC", - element.subset = 1:100, - formula = formula_lm -) - -head(res_wrap_lm) -``` - -Optionally, save the results back into the HDF5 file: - -```r -writeResults(h5_path, df.output = res_wrap_lm, analysis_name = "results_wrap_lm") -``` - -## Notes and tips - -- The schema (column names) is inferred from the first element that has sufficient valid subjects. Ensure your `FUN` returns the same shape and names for all elements. -- `ModelArray.wrap` forwards `...` to your `FUN`. Use this to pass parameters like `formula` or other options. -- Subject thresholds: elements with insufficient finite values are automatically filled with `NaN` outputs, keeping `element_id` for alignment with other results. -- Parallelization: set `n_cores > 1` to enable multi-core execution; set `pbar = TRUE` to show a progress bar. - -## Example: Computing simple group means - -You can also use `ModelArray.wrap` to compute very simple statistics, such as group means of the response for each element. -The example below computes the mean value of the response -(the `scalar` column) within each level of a grouping variable (for example, `sex`), -and also reports the difference between the first two levels if present. - -```r -#' One-element group means across levels of a grouping variable -my_group_means_fun <- function(data, group_var, response_name) { - if (!group_var %in% names(data)) stop("group_var not found in data") - if (!response_name %in% names(data)) stop("response_name not found in data") - - df <- data - vals <- df[[response_name]] - # keep finite values only - keep <- is.finite(vals) - df <- df[keep, , drop = FALSE] - vals <- df[[response_name]] - grp <- df[[group_var]] - - levs <- sort(unique(grp)) - out <- list() - for (lv in levs) { - out[[paste0("mean_", response_name, "_", group_var, "_", lv)]] <- mean(vals[grp == lv], na.rm = TRUE) - } - if (length(levs) >= 2) { - out[[paste0("diff_mean_", response_name, "_", group_var, "_", levs[2], "_minus_", levs[1])]] <- - out[[paste0("mean_", response_name, "_", group_var, "_", levs[2])]] - - out[[paste0("mean_", response_name, "_", group_var, "_", levs[1])]] - } - - tibble::as_tibble(out) -} -``` - -Run across elements (here on the first 100 for speed), and save results: - -```r -library(ModelArray) - -# Paths and data (adapt these to your setup) -h5_path <- "~/Desktop/myProject/demo_FDC_n100.h5" -csv_path <- "~/Desktop/myProject/cohort_FDC_n100.csv" -modelarray <- ModelArray(h5_path, scalar_types = c("FDC")) -phenotypes <- read.csv(csv_path) - -res_group_means <- ModelArray.wrap( - FUN = my_group_means_fun, - data = modelarray, - phenotypes = phenotypes, - scalar = "FDC", - element.subset = 1:100, - group_var = "sex", - response_name = "FDC" -) - -head(res_group_means) - -writeResults(h5_path, df.output = res_group_means, analysis_name = "results_group_means") -``` - - diff --git a/vignettes/wrap_function.Rmd b/vignettes/wrap_function.Rmd new file mode 100644 index 0000000..fcab4cf --- /dev/null +++ b/vignettes/wrap_function.Rmd @@ -0,0 +1,302 @@ +--- +title: "Developing a function for ModelArray.wrap" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Developing a function for ModelArray.wrap} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +## Introduction + +`R` offers a ton of wonderful packages for statistical modeling - +more than we could ever hope to officially support with functions like `ModelArray.lm` and `ModelArray.gam` in `ModelArray`. +Fortunately, `ModelArray.wrap` provides a way to run your own function, +which can be arbitrarily complex, +while using the efficient looping and parallelization machinery of `ModelArray`. + +We'll skip over the making of the .h5 file and phenotypes CSV file, +and directly download the cortical thickness data from the Healthy Brain Network +computed as part of the Reproducible Brain Charts project. + +We'll develop a function that computes the mean cortical thickness within each site, +and then use `ModelArray.wrap` to run it across elements. + + +## Step 1. Prepare your data + +We first create a folder called "myProject" on the Desktop. In a terminal console: + +``` {.console} +$ cd ~/Desktop +$ mkdir myCustomFunction +$ cd myCustomFunction +$ pwd # print the full path of this folder +``` + +``` {.console} +$ wget -cO - https://osf.io/tce9d/download > download.tar.gz +$ tar -xzf download.tar.gz +$ rm download.tar.gz +``` + +## Step 2. Use ModelArray to perform statistical analysis + +The next step is to use this HDF5 file and the CSV file we prepared to perform statistical analysis in R. +If you installed RStudio (which we recommend), then you can simply launch RStudio. +All the commands in this Step 2 section will be run in R. + +> Hint for voxel-wise data for Step 2: You can follow the same steps here. Note that each "element" is a *voxel* now, instead of a *fixel*. Make sure you replace the scalar name, covariates, file paths etc with yours. In addition, as voxel-wise data may have subject-specific masks, you can also tailor the lower threshold of number of subjects when applying `ModelArray.lm()` and `ModelArray.gam()`. See `vignette("voxel-wise_data")` page for more. + +### Step 2.1. Load ModelArray package in R + +After you've installed `ModelArray`, here's how we load the library: + +```{r load ModelArray} +library(ModelArray) +``` + +### Step 2.2. Create a ModelArray-class object + +To create a ModelArray-class object that represents the HDF5 file of fixel-wise data, we need the HDF5 filename and the scalar's name: + +```{r create ModelArray object, eval=FALSE} +h5_path <- "~/Desktop/myCustomFunction/ConCifti/study-HBN_compression-gzip_chunkmb-4_thickness.h5" +# create a ModelArray-class object: +modelarray <- ModelArray(h5_path, scalar_types = c("thickness")) +# let's check what's in it: +modelarray +``` + +You'll see: +```{.console} +ModelArray located at ~/Desktop/myProject/demo_FDC_n100.h5 + + Source files: 2336 + Scalars: thickness + Analyses: +``` + +This shows that there are 2336 source thickness files in this `modelarray` you created. + +You may take a look at what the scalar matrix looks like by using `scalars()`: +```{r access slots, eval = FALSE} +# scalar FDC data: +scalars(modelarray)[["thickness"]] +``` +You'll see: +```{.console} +<327684 x 2336> DelayedMatrix object of type "double": + ciftis/sub-NDARAA075AMK_thickness.fsLR_den-164k.dscalar.nii ... ciftis/sub-NDARZZ810LVF_thickness.fsLR_den-164k.dscalar.nii + [1,] 2.323956 . 3.401824 + [2,] 2.927209 . 2.428174 + [3,] 3.211094 . 2.479505 + [4,] 2.463409 . 3.760944 + [5,] 2.460162 . 2.912316 + ... . . . +[327680,] 2.832996 . 3.066516 +[327681,] 2.669299 . 2.912496 +[327682,] 2.555752 . 2.510855 +[327683,] 2.336970 . 2.472471 +[327684,] 2.256365 . 2.293425 +``` +Rows are greyordinates `(n = 327684)`, and column names are the source filenames `(n = 2336)`. +Each value is a specific greyordinate's thickness from a dscalar.nii file. + +### Step 2.3. Load cohort phenotypes CSV file + +We then load the accompanying CSV file: + +```{r load csv file, eval=FALSE} +csv_path <- "~/Desktop/myCustomFunction/ConCifti/study-HBN_desc-participants_thickness.csv" +# load the CSV file: +phenotypes <- read.csv(csv_path) +nrow(phenotypes) +names(phenotypes) +``` + +This CSV file comes from RBC but is reduced to include only the subjects with thickness data. +It also includes rich phenotype information. + +### Step 2.4. Perform statistical analysis +With both `modelarray` and data frame `phenotypes` set up, we can now perform the statistical analysis. + +We need to write a function that we can use within `ModelArray.wrap` to compute what we want. +It can be tricky to figure out exactly what you want to compute and how to make sure your function returns the correct output. +We can use a convenience function from `ModelArray` to see what a dataframe looks like for a single element. + +```{r check_one_element_df} +example_df <- exampleElementData(modelarray, scalar = "thickness", i_element = 12345, phenotypes = phenotypes) +names(example_df) +``` + +This is a rich dataframe! +It contains the cortical thickness for all subjects in the study at vertex 12345. +It also contains the subject ID, sex, age, and other covariates. + +We can explore this single element like we would any dataframe: + +```{r explore_one_element_df} +library(ggplot2) +ggplot(example_df, aes(x = age, y = thickness, color=study_site)) + + geom_point() + + stat_smooth(method = "lm") + + theme_minimal() +``` + +This looks cool. +There is apparently a site effect, +so let's write a function that computes a mean cortical thickness within each site. +We'll also compute an anova on site and get the statistics/p-values. +For now we'll get the values we want into a dataframe, then we'll write the function. + +```{r means_into_df_example} +site_means <- example_df %>% + group_by(study_site) %>% + summarize(mean_thickness = mean(thickness, na.rm = TRUE)) %>% + # write them into "thickness_{study_site}" columns + pivot_wider(names_from = study_site, values_from = mean_thickness, names_glue = "thickness_{study_site}") +site_means +``` + +This looks good. +Now let's do the anova and get the statistics/p-values. + +```{r anova_into_df_example} +site_anova <- example_df %>% + { lm(thickness ~ study_site, data = .) } %>% + anova() %>% + broom::tidy() %>% + dplyr::mutate( + partial_eta_sq = dplyr::if_else( + term != "Residuals", + sumsq / (sumsq + sumsq[term == "Residuals"]), + NA_real_ + ) + ) %>% + dplyr::filter(term != "Residuals") %>% + dplyr::select(term, df, sumsq, meansq, statistic, p.value, partial_eta_sq) %>% + tidyr::pivot_wider( + names_from = term, + values_from = c(df, sumsq, meansq, statistic, p.value, partial_eta_sq), + names_glue = "{term}.{.value}" + ) %>% + tibble::as_tibble() + +site_anova +``` + +Combining the two we get the full function we want. +Absolutely critical here is that your function needs a single argument named `data`: + +```{r full_function_example} +my_mean_thickness_fun <- function(data) { + site_means <- data %>% + group_by(study_site) %>% + summarize(mean_thickness = mean(thickness, na.rm = TRUE)) %>% + pivot_wider(names_from = study_site, values_from = mean_thickness, names_glue = "thickness_{study_site}") + site_anova <- data %>% + { lm(thickness ~ study_site, data = .) } %>% + anova() %>% + broom::tidy() %>% + dplyr::mutate( + partial_eta_sq = dplyr::if_else( + term != "Residuals", + sumsq / (sumsq + sumsq[term == "Residuals"]), + NA_real_ + ) + ) %>% + dplyr::filter(term != "Residuals") %>% + dplyr::select(term, df, sumsq, meansq, statistic, p.value, partial_eta_sq) %>% + tidyr::pivot_wider( + names_from = term, + values_from = c(df, sumsq, meansq, statistic, p.value, partial_eta_sq), + names_glue = "{term}.{.value}" + ) %>% + tibble::as_tibble() + bind_cols(site_means, site_anova) +} + +my_mean_thickness_fun(example_df) +``` + +This looks great! +We can test that the function will work in the `ModelArray` looping machinery by running it on a single element. + +```{r test_one_element} +analyseOneElement.wrap( + 12345, + my_mean_thickness_fun, + modelarray, + phenotypes, + "thickness", + num.subj.lthr=10, + flag_initiate = TRUE, + on_error = "debug" +) +``` +This will print the column names that will be retrieved from the function when it's run in the `ModelArray` looping machinery. +It looks good, so we can run it on all the greyordinates: + +```{r run_wrapped_function, eval=FALSE} +res_wrap <- ModelArray.wrap( + FUN = my_mean_thickness_fun, + data = modelarray, + phenotypes = phenotypes, + scalar = "thickness", + n_cores = 8 +) +``` + +On my laptop this took about 15 minutes. + +```{r head_results, eval=FALSE} +head(res_wrap) +``` + +```{.console} + element_id thickness_HBNsiteCBIC thickness_HBNsiteCUNY thickness_HBNsiteRU thickness_HBNsiteSI study_site.df study_site.sumsq study_site.meansq study_site.statistic study_site.p.value study_site.partial_eta_sq +1 0 3.108483 3.009349 3.063119 2.692335 3 45.52142 15.173806 59.35110 5.688104e-37 0.07093606 +2 1 2.618739 2.673533 2.494077 2.324791 3 24.51961 8.173205 28.88297 2.474739e-18 0.03582533 +3 2 3.111822 3.056225 2.983709 2.812225 3 23.59899 7.866330 19.95448 9.059350e-13 0.02502795 +4 3 3.669784 3.621715 3.054638 3.329743 3 186.55051 62.183503 83.32945 3.261715e-51 0.09682010 +5 4 2.790135 2.729325 2.494522 2.444265 3 53.34644 17.782147 55.42033 1.336311e-34 0.06655069 +6 5 2.996483 3.043458 2.785565 2.650955 3 40.25648 13.418828 41.60603 3.458560e-26 0.05080478 +``` + + +We now save the results data frame into the original h5 file: + +```{r write results, eval=FALSE} +writeResults(h5_path, df.output = res_wrap, analysis_name = "anova_and_means") +``` + +Notice that the the analysis name specified in argument `analysis_name` will be used in `ConCifti` in the next step when converting results back to dscalar.nii file format. +It'll also be used as the prefix of the mif files to be saved. + + +## Step 3. Convert back to dscalar.nii file format + + +### Step 3.1. Convert the statistical results into mif file format using ConFixel +We now use `ConFixel` to convert the results into a list of mif files: + +```{.console} +$ ciftistats_write \ + --cohort-file ${HOME}/Desktop/myCustomFunction/ConCifti/study-HBN_desc-participants_thickness.csv \ + --relative-root ${HOME}/Desktop/myCustomFunction \ + --analysis-name anova_and_means \ + --input-hdf5 ${HOME}/Desktop/myCustomFunction/ConCifti/study-HBN_compression-gzip_chunkmb-4_thickness.h5 \ + --output-dir ${HOME}/Desktop/myCustomFunction/anova_and_means \ + --example-cifti ${HOME}/Desktop/myCustomFunction/ConCifti/example_fsLR_den-164k.dscalar.nii + +# remember to use your specific path in --relative-root; +# we recommend it's a full path too! +``` + +After it's done, in the main folder `myCustomFunction`, +there will be a new folder called `anova_and_means` and converted result `dscalar.nii`files are in this new folder: + + + + From 63e398c9f45cc54a8b0f5ae1a728106295dcf4f8 Mon Sep 17 00:00:00 2001 From: mattcieslak Date: Wed, 17 Sep 2025 19:11:41 -0400 Subject: [PATCH 7/9] update docs --- README.md | 6 +++++- _pkgdown.yml | 3 +-- vignettes/basic_r_intro.Rmd | 2 +- vignettes/wrap_function.Rmd | 12 ++++++------ 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 1497492..d979858 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ + # ModelArray @@ -44,7 +45,10 @@ paper](https://doi.org/10.1016/j.neuroimage.2023.120037) if you use
-![Overview](man/figures/overview_structure.png) +
+Overview + +
diff --git a/_pkgdown.yml b/_pkgdown.yml index 06e6d15..088366a 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -3,6 +3,5 @@ template: bootstrap: 5 redirects: - - from: articles/wrap.html - to: articles/wrap_function.html + - ["articles/wrap.html", "articles/wrap_function.html"] diff --git a/vignettes/basic_r_intro.Rmd b/vignettes/basic_r_intro.Rmd index 707ead4..a79c002 100644 --- a/vignettes/basic_r_intro.Rmd +++ b/vignettes/basic_r_intro.Rmd @@ -7,7 +7,7 @@ vignette: > %\VignetteEncoding{UTF-8} --- -```{r setup, include = FALSE} +```{r setup, include = FALSE, eval=FALSE} library(ModelArray) knitr::opts_chunk$set( collapse = TRUE, diff --git a/vignettes/wrap_function.Rmd b/vignettes/wrap_function.Rmd index fcab4cf..5791353 100644 --- a/vignettes/wrap_function.Rmd +++ b/vignettes/wrap_function.Rmd @@ -125,7 +125,7 @@ We need to write a function that we can use within `ModelArray.wrap` to compute It can be tricky to figure out exactly what you want to compute and how to make sure your function returns the correct output. We can use a convenience function from `ModelArray` to see what a dataframe looks like for a single element. -```{r check_one_element_df} +```{r check_one_element_df, eval=FALSE} example_df <- exampleElementData(modelarray, scalar = "thickness", i_element = 12345, phenotypes = phenotypes) names(example_df) ``` @@ -136,7 +136,7 @@ It also contains the subject ID, sex, age, and other covariates. We can explore this single element like we would any dataframe: -```{r explore_one_element_df} +```{r explore_one_element_df, eval=FALSE} library(ggplot2) ggplot(example_df, aes(x = age, y = thickness, color=study_site)) + geom_point() + @@ -150,7 +150,7 @@ so let's write a function that computes a mean cortical thickness within each si We'll also compute an anova on site and get the statistics/p-values. For now we'll get the values we want into a dataframe, then we'll write the function. -```{r means_into_df_example} +```{r means_into_df_example, eval=FALSE} site_means <- example_df %>% group_by(study_site) %>% summarize(mean_thickness = mean(thickness, na.rm = TRUE)) %>% @@ -162,7 +162,7 @@ site_means This looks good. Now let's do the anova and get the statistics/p-values. -```{r anova_into_df_example} +```{r anova_into_df_example, eval=FALSE} site_anova <- example_df %>% { lm(thickness ~ study_site, data = .) } %>% anova() %>% @@ -189,7 +189,7 @@ site_anova Combining the two we get the full function we want. Absolutely critical here is that your function needs a single argument named `data`: -```{r full_function_example} +```{r full_function_example, eval=FALSE} my_mean_thickness_fun <- function(data) { site_means <- data %>% group_by(study_site) %>% @@ -223,7 +223,7 @@ my_mean_thickness_fun(example_df) This looks great! We can test that the function will work in the `ModelArray` looping machinery by running it on a single element. -```{r test_one_element} +```{r test_one_element, eval=FALSE} analyseOneElement.wrap( 12345, my_mean_thickness_fun, From 991aa711aa51490abfdc8d691f74b94f21e51d1f Mon Sep 17 00:00:00 2001 From: mattcieslak Date: Wed, 17 Sep 2025 19:21:42 -0400 Subject: [PATCH 8/9] update osf.io link --- vignettes/wrap_function.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/wrap_function.Rmd b/vignettes/wrap_function.Rmd index 5791353..c9dd15f 100644 --- a/vignettes/wrap_function.Rmd +++ b/vignettes/wrap_function.Rmd @@ -35,7 +35,7 @@ $ pwd # print the full path of this folder ``` ``` {.console} -$ wget -cO - https://osf.io/tce9d/download > download.tar.gz +$ wget -cO - https://osf.io/uxc65/download > download.tar.gz $ tar -xzf download.tar.gz $ rm download.tar.gz ``` From e17fe6e01cdf2da0bac18227c76541c186f13bda Mon Sep 17 00:00:00 2001 From: mattcieslak Date: Wed, 17 Sep 2025 19:25:26 -0400 Subject: [PATCH 9/9] styler --- R/ModelArray_Constructor.R | 148 ++++++++++++++++++++---------------- vignettes/wrap_function.Rmd | 22 +++--- 2 files changed, 96 insertions(+), 74 deletions(-) diff --git a/R/ModelArray_Constructor.R b/R/ModelArray_Constructor.R index e817eae..a340b3b 100644 --- a/R/ModelArray_Constructor.R +++ b/R/ModelArray_Constructor.R @@ -240,7 +240,8 @@ ModelArray <- function(filepath, # exists # /results//has_names: names_results_matrix <- rhdf5::h5readAttributes(filepath, - name = sprintf("results/%s/results_matrix", analysis_name))$colnames # after updating writeResults() + name = sprintf("results/%s/results_matrix", analysis_name) + )$colnames # after updating writeResults() # names_results_matrix <- ModelArraySeed(filepath, name = sprintf( # "results/%s/has_names", analysis_name), type = NA) %>% @@ -420,27 +421,32 @@ analyseOneElement.lm <- function(i_element, # onemodel <- stats::lm(formula, data = dat, ...) # onemodel <- stats::lm(formula, data = dat, weights = myWeights,...) - onemodel <- tryCatch({ - do.call(stats::lm, arguments_lm) - }, error = function(e) { - msg <- paste0("analyseOneElement.lm error at element ", - i_element, - ": ", - conditionMessage(e)) - if (on_error == "debug" && interactive()) { - message(msg) - browser() - } - if (on_error == "skip" || on_error == "debug") { - warning(msg) - if (flag_initiate == TRUE) { - return(structure(list(.lm_error_initiate = TRUE), class = "lm_error")) - } else { - return(structure(list(.lm_error_runtime = TRUE), class = "lm_error")) + onemodel <- tryCatch( + { + do.call(stats::lm, arguments_lm) + }, + error = function(e) { + msg <- paste0( + "analyseOneElement.lm error at element ", + i_element, + ": ", + conditionMessage(e) + ) + if (on_error == "debug" && interactive()) { + message(msg) + browser() + } + if (on_error == "skip" || on_error == "debug") { + warning(msg) + if (flag_initiate == TRUE) { + return(structure(list(.lm_error_initiate = TRUE), class = "lm_error")) + } else { + return(structure(list(.lm_error_runtime = TRUE), class = "lm_error")) + } } + stop(e) } - stop(e) - }) + ) if (inherits(onemodel, "lm_error")) { if (flag_initiate == TRUE) { @@ -660,27 +666,32 @@ analyseOneElement.gam <- function(i_element, arguments$data <- dat # explicitly passing arguments into command, to avoid error of argument "weights" - onemodel <- tryCatch({ - do.call(mgcv::gam, arguments) - }, error = function(e) { - msg <- paste0("analyseOneElement.gam error at element ", - i_element, - ": ", - conditionMessage(e)) - if (on_error == "debug" && interactive()) { - message(msg) - browser() - } - if (on_error == "skip" || on_error == "debug") { - warning(msg) - if (flag_initiate == TRUE) { - return(structure(list(.gam_error_initiate = TRUE), class = "gam_error")) - } else { - return(structure(list(.gam_error_runtime = TRUE), class = "gam_error")) + onemodel <- tryCatch( + { + do.call(mgcv::gam, arguments) + }, + error = function(e) { + msg <- paste0( + "analyseOneElement.gam error at element ", + i_element, + ": ", + conditionMessage(e) + ) + if (on_error == "debug" && interactive()) { + message(msg) + browser() } + if (on_error == "skip" || on_error == "debug") { + warning(msg) + if (flag_initiate == TRUE) { + return(structure(list(.gam_error_initiate = TRUE), class = "gam_error")) + } else { + return(structure(list(.gam_error_runtime = TRUE), class = "gam_error")) + } + } + stop(e) } - stop(e) - }) + ) if (inherits(onemodel, "gam_error")) { if (flag_initiate == TRUE) { @@ -753,11 +764,11 @@ analyseOneElement.gam <- function(i_element, # remove those columns: if (length(var.smoothTerms.remove) != 0) { # if length=0, it's list(), nothing to remove - onemodel.tidy.smoothTerms <- dplyr::select(onemodel.tidy.smoothTerms,-all_of(var.smoothTerms.remove)) + onemodel.tidy.smoothTerms <- dplyr::select(onemodel.tidy.smoothTerms, -all_of(var.smoothTerms.remove)) } if (length(var.parametricTerms.remove) != 0) { # if length=0, it's list(), nothing to remove - onemodel.tidy.parametricTerms <- dplyr::select(onemodel.tidy.parametricTerms,-all_of(var.parametricTerms.remove)) + onemodel.tidy.parametricTerms <- dplyr::select(onemodel.tidy.parametricTerms, -all_of(var.parametricTerms.remove)) } if (length(var.model.remove) != 0) { onemodel.glance <- dplyr::select(onemodel.glance, -all_of(var.model.remove)) @@ -879,8 +890,10 @@ analyseOneElement.gam <- function(i_element, # combine the tables: check if any of them is empty (tibble()) - onemodel.onerow <- bind_cols_check_emptyTibble(onemodel.tidy.smoothTerms.onerow, - onemodel.tidy.parametricTerms.onerow) + onemodel.onerow <- bind_cols_check_emptyTibble( + onemodel.tidy.smoothTerms.onerow, + onemodel.tidy.parametricTerms.onerow + ) onemodel.onerow <- bind_cols_check_emptyTibble(onemodel.onerow, onemodel.glance.onerow) @@ -1006,27 +1019,32 @@ analyseOneElement.wrap <- function(i_element, arguments$data <- dat # Execute user function with error handling - result <- tryCatch({ - do.call(user_fun, arguments) - }, error = function(e) { - msg <- paste0("analyseOneElement.wrap error at element ", - i_element, - ": ", - conditionMessage(e)) - if (on_error == "debug" && interactive()) { - message(msg) - browser() - } - if (on_error == "skip" || on_error == "debug") { - warning(msg) - if (flag_initiate == TRUE) { - return(structure(list(.wrap_error_initiate = TRUE), class = "wrap_error")) - } else { - return(structure(list(.wrap_error_runtime = TRUE), class = "wrap_error")) + result <- tryCatch( + { + do.call(user_fun, arguments) + }, + error = function(e) { + msg <- paste0( + "analyseOneElement.wrap error at element ", + i_element, + ": ", + conditionMessage(e) + ) + if (on_error == "debug" && interactive()) { + message(msg) + browser() + } + if (on_error == "skip" || on_error == "debug") { + warning(msg) + if (flag_initiate == TRUE) { + return(structure(list(.wrap_error_initiate = TRUE), class = "wrap_error")) + } else { + return(structure(list(.wrap_error_runtime = TRUE), class = "wrap_error")) + } } + stop(e) } - stop(e) - }) + ) # On error in user function, return NaNs according to flag if (inherits(result, "wrap_error")) { @@ -1127,7 +1145,7 @@ writeResults <- function(fn.output, # check if group "results\" exists: if (results.grp$exists(analysis_name) == TRUE && - overwrite == FALSE) { + overwrite == FALSE) { warning(paste0(analysis_name, " exists but not to overwrite!")) # TODO: add checker for exisiting analysis_name, esp the matrix size results.analysis.grp <- results.grp$open(analysis_name) @@ -1135,7 +1153,7 @@ writeResults <- function(fn.output, } else { # not exist; or exist && overwrite: to create if (results.grp$exists(analysis_name) == TRUE && - overwrite == TRUE) { + overwrite == TRUE) { # delete existing one first results.grp$link_delete(analysis_name) # NOTE: the file size will not shrink after your deletion.. @@ -1153,7 +1171,7 @@ writeResults <- function(fn.output, col_class <- as.character(sapply(df.output, class)[i_col]) # class of this column if ((col_class != "numeric") && - (col_class != "integer")) { + (col_class != "integer")) { # the column class is not numeric or integer message( paste0( diff --git a/vignettes/wrap_function.Rmd b/vignettes/wrap_function.Rmd index c9dd15f..ced4d01 100644 --- a/vignettes/wrap_function.Rmd +++ b/vignettes/wrap_function.Rmd @@ -138,7 +138,7 @@ We can explore this single element like we would any dataframe: ```{r explore_one_element_df, eval=FALSE} library(ggplot2) -ggplot(example_df, aes(x = age, y = thickness, color=study_site)) + +ggplot(example_df, aes(x = age, y = thickness, color = study_site)) + geom_point() + stat_smooth(method = "lm") + theme_minimal() @@ -164,7 +164,9 @@ Now let's do the anova and get the statistics/p-values. ```{r anova_into_df_example, eval=FALSE} site_anova <- example_df %>% - { lm(thickness ~ study_site, data = .) } %>% + { + lm(thickness ~ study_site, data = .) + } %>% anova() %>% broom::tidy() %>% dplyr::mutate( @@ -196,7 +198,9 @@ my_mean_thickness_fun <- function(data) { summarize(mean_thickness = mean(thickness, na.rm = TRUE)) %>% pivot_wider(names_from = study_site, values_from = mean_thickness, names_glue = "thickness_{study_site}") site_anova <- data %>% - { lm(thickness ~ study_site, data = .) } %>% + { + lm(thickness ~ study_site, data = .) + } %>% anova() %>% broom::tidy() %>% dplyr::mutate( @@ -225,12 +229,12 @@ We can test that the function will work in the `ModelArray` looping machinery by ```{r test_one_element, eval=FALSE} analyseOneElement.wrap( - 12345, - my_mean_thickness_fun, - modelarray, - phenotypes, - "thickness", - num.subj.lthr=10, + 12345, + my_mean_thickness_fun, + modelarray, + phenotypes, + "thickness", + num.subj.lthr = 10, flag_initiate = TRUE, on_error = "debug" )