diff --git a/.Rbuildignore b/.Rbuildignore index 5b5cb9d..f71e5af 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -2,3 +2,8 @@ ^\.Rproj\.user$ ^etc$ ^docs$ +^\_pkgdown\.yml$ +^index\.md$ +^\.travis\.yml$ +^LICENSE$ +^MAINTENANCE\.md$ diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..db39904 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,25 @@ +# R for travis: see documentation at https://docs.travis-ci.com/user/languages/r + +language: r +r: + - oldrel + - release + - devel +r_packages: + - ncdf4 + - sf +sudo: false +cache: packages +addons: + apt: + update: true + packages: + - librdf0-dev + - libnetcdf-dev + - netcdf-bin + - libudunits2-dev # for udunits2 + - libgeos-dev # for sf + - libproj-dev # for sf + - libgdal-dev # for sf + - libjq-dev + - libv8-dev diff --git a/DESCRIPTION b/DESCRIPTION index 5027528..5057935 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,36 +1,55 @@ Package: arcticdatautils -Title: Arctic Data Utilities -Version: 0.5.13 +Title: Utilities for the Arctic Data Center +Version: 0.7.0 Authors@R: c( person("Bryce", "Mecum", email = "mecum@nceas.ucsb.edu", role = c("aut", "cre")), - person("Matt", "Jones", email = "jones@nceas.ucsb.edu", role = "ctb") + person("Matt", "Jones", email = "jones@nceas.ucsb.edu", role = "ctb"), + person("Jesse", "Goldstein", email = "jgoldstein@nceas.ucsb.edu", role = "ctb", comment = "Maintainer"), + person("Jeanette", "Clark", email = "jclark@nceas.ucsb.edu", role = "ctb", comment = "Maintainer"), + person("Dominic", "Mullen", email = "dmullen17@gmail.com", role = "ctb"), + person("Emily", "O'Dean", email = "eodean10@gmail.com", role = "ctb"), + person("Robyn", "Thiessen-Bock", email = "robyn.thiessenbock@gmail.com", role = "ctb"), + person("Derek", "Strong", email = "dstrong@nceas.ucsb.edu", role = "ctb"), + person("Rachel", "Sun", email = "rachelsun@ucsb.edu", role = "ctb") ) -Description: This package provides a set of utility methods for uploading - and editing data on the Arctic Data Catalog. +Description: A set of utilities for working with the Arctic Data Center + (https://arcticdata.io). +License: Apache License (== 2.0) +URL: https://nceas.github.io/arcticdatautils/ +BugReports: https://github.com/NCEAS/arcticdatautils/issues +Encoding: UTF-8 +LazyData: true Depends: R (>= 3.2.3) Imports: - digest, - dplyr, dataone, datapack, - EML, + dplyr, + digest, + EML (>= 2.0), httr, + jsonlite, + magrittr, methods, - ncdf4, stringr, stringi, tools, uuid, - yaml, xml2, XML -License: MIT + file LICENSE -LazyData: true Suggests: - testthat, + emld, + humaniformat, knitr, + lubridate, + ncdf4, + RCurl, + purrr, rmarkdown, - xslt -RoxygenNote: 6.0.1 + sf, + testthat, + xslt, + yaml +RoxygenNote: 6.1.1 +Roxygen: list(markdown = TRUE) VignetteBuilder: knitr diff --git a/MAINTENANCE.md b/MAINTENANCE.md new file mode 100644 index 0000000..1dcdf91 --- /dev/null +++ b/MAINTENANCE.md @@ -0,0 +1,85 @@ +# Maintenance + +This document serves as a guide for new maintainers of and contributors to the arcticdatautils package. + +*Note: This is a work-in-progress, so expect it to change and improve over time.* + + +## Releases + +### Why release + +So users can use new features. Ideally, no one is installing from source, i.e., `remotes::install_github("nceas/arcticdatautils")`. + +### When to release + +Whenever, really. +Since this package isn't on CRAN, you can release it as much as you like or need to. +You might want to release either when: + +- You accrue enough changes to write an interest release announcement ("Hey, look at his cool new release that fixes annoying bug X!") +- You accrue at least one change and users of the package need the fix immediately + +### How to release + +There are a few steps in releasing a new version of the package: + +1. Increment the `Version` tag in the `DESCRIPTION` file to the appropriate next version. + + What this is set to specifies what the user sees from R when they run `sessionInfo()` or `devtools::session_info()` and tell you what version they have installed. + + This package tries to use [Semantic Versioning](https://semver.org/) (semver) which can be summarized in three bullets: + + > Given a version number MAJOR.MINOR.PATCH, increment the: + > + > - MAJOR version when you make incompatible API changes, + > - MINOR version when you add functionality in a backwards-compatible manner, and + > - PATCH version when you make backwards-compatible bug fixes. + + Note: A common mistake people make is thinking that the next version after 0.9 is 1.0, but it could be 0.10, then 0.11, and so on. + + `git` and GitHub helps us a lot with determining _what_ has changed so we can determine what the next release version number should be. We can compare a previous release to `master` to get a list of all commits what were made between that release and now: + + > https://github.com/NCEAS/arcticdatautils/compare/v0.6.3...master + + +- Make and push a commit with just that diff, + + ```sh + git add DESCRIPTION + git commit -m "vx.y.z" + git push + ``` + + [Example here](https://github.com/NCEAS/arcticdatautils/commit/87f91179f4820ecdb283672e2179984d4f6cd334). + +2. Go to [the releases tab](https://github.com/NCEAS/arcticdatautils/releases) and click "Draft a new release" + + - Tag version and Release title should match v{MAJOR}.{MINOR}.{PATCH}, e.g., v6.4.5 + - The release description should include: + - A brief, 1-2 sentence description of what's changed since the last release + - Sections for ADDED/FIXED/REMOVED (omit section if not applicable), each with a bulleted list of changes in human-readable prose + + Example: https://github.com/NCEAS/arcticdatautils/releases/tag/v0.6.2 + + - Make liberal use of GitHub's Compare feature: [Example](https://github.com/NCEAS/arcticdatautils/compare/v0.6.3...master) comparing `v0.6.3` to `master`. + +You're done, now go tell people to upgrade! + +```r +remotes::install_github("nceas/arcticdatautils@*release") +``` + +Note: `@*release` specifies that the latest release should be installed. + + +## Pull Requests + +- Follow the [tidyverse style conventions](http://style.tidyverse.org/), with the following specific style preferences: + - use underscore for all variable names unless referring to an EML object (e.g., otherEntity, publicationDate, etc.) + - include argument checks in the form of `stopifnot()` statements for all functions +- Before submitting a pull request, please update documentation, check package, and run tests: + - use `devtools::check()` + - fix any ERRORs and test failures to ensure the Travis CI build passes +- Commit messages should follow these [guidelines](https://chris.beams.io/posts/git-commit/) +- If fixing an issue, pull requests should reference that issue (e.g., "This update closes #93.") diff --git a/NAMESPACE b/NAMESPACE index 2703443..23485da 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,108 +1,88 @@ # Generated by roxygen2: do not edit by hand -export(add_access_rules) -export(add_additional_identifiers) -export(add_admin_group_access) -export(add_methods_step) -export(add_string_to_title) -export(change_eml_name) -export(clear_methods) -export(clear_replication_policy) export(convert_iso_to_eml) +export(create_dummy_attributes_dataframe) +export(create_dummy_enumeratedDomain_dataframe) export(create_dummy_metadata) export(create_dummy_object) export(create_dummy_package) +export(create_dummy_package_full) export(create_dummy_parent_package) -export(create_from_folder) -export(create_object) export(create_resource_map) -export(create_sysmeta) -export(determine_child_pids) -export(eml_add_entities) -export(eml_address) export(eml_associated_party) export(eml_contact) export(eml_creator) -export(eml_individual_name) +export(eml_get_simple) export(eml_metadata_provider) +export(eml_nsf_to_project) +export(eml_otherEntity_to_dataTable) export(eml_party) +export(eml_personnel) export(eml_project) +export(eml_set_reference) +export(eml_set_shared_attributes) export(eml_validate_attributes) export(env_get) -export(env_load) -export(filter_obsolete_pids) -export(filter_packaging_statements) export(find_newest_object) -export(find_newest_resource_map) export(format_eml) export(format_iso) export(generate_resource_map) -export(generate_resource_map_pid) +export(get_all_sysmeta) export(get_all_versions) -export(get_current_version) -export(get_doc_id) -export(get_identifier) -export(get_latest_release) export(get_mn_base_url) export(get_ncdf4_attributes) -export(get_netcdf_format_id) -export(get_or_create_pid) +export(get_orcid_email) +export(get_orcid_name) export(get_package) export(get_token) -export(get_token_subject) export(guess_format_id) -export(insert_file) -export(insert_package) -export(inv_add_extra_columns) -export(inv_add_parent_package_column) -export(inv_init) -export(inv_load_checksums) -export(inv_load_dois) -export(inv_load_files) -export(inv_load_identifiers) -export(inv_load_sizes) -export(inv_update) export(is_authorized) -export(is_format_id) export(is_obsolete) -export(is_resource_map) +export(is_public_read) export(is_token_expired) export(is_token_set) -export(log_message) +export(list_submissions) export(mdq_run) export(new_uuid) export(object_exists) export(parse_resource_map) -export(path_join) -export(pid_to_eml_other_entity) +export(pid_to_eml_entity) export(pid_to_eml_physical) export(publish_object) export(publish_update) +export(read_zip_shapefile) +export(recover_failed_submission) +export(recover_prov) +export(remove_access) export(remove_public_read) -export(replace_package_id) -export(replace_subject) -export(set_abstract) +export(reorder_pids) export(set_access) export(set_file_name) -export(set_other_entities) export(set_public_read) +export(set_public_read_all_versions) export(set_rights_and_access) export(set_rights_holder) export(show_indexing_status) -export(show_random_dataset) -export(substitute_eml_party) -export(sysmeta_to_eml_other_entity) export(sysmeta_to_eml_physical) -export(sysmeta_to_other_entity) -export(theme_packages) export(update_object) -export(update_package) export(update_resource_map) -export(validate_environment) -export(validate_inventory) export(view_profile) -export(warn_current_version) +export(which_in_eml) import(EML) import(XML) import(dataone) import(datapack) +importFrom(httr,content) +importFrom(magrittr,'%>%') +importFrom(methods,"slot<-") +importFrom(methods,as) +importFrom(methods,is) +importFrom(methods,new) +importFrom(methods,slot) +importFrom(stats,na.omit) +importFrom(utils,URLencode) +importFrom(utils,head) +importFrom(utils,read.csv) +importFrom(utils,read.delim) +importFrom(utils,setTxtProgressBar) +importFrom(utils,txtProgressBar) diff --git a/R/access.R b/R/access.R index b427ae1..ef98c0f 100644 --- a/R/access.R +++ b/R/access.R @@ -1,27 +1,99 @@ -#' access.R +# High-level utility functions for getting and setting access rules for DataONE objects + + +#' Add access rules to the sysmeta object +#' +#' This is a function to add a standard set of access rules to +#' every object so that the access rules don't differ across objects. +#' +#' @param sysmeta (SystemMetadata) The SystemMetadata to add rules to. +#' +#' @return The modified SystemMetadata object. +#' +#' @noRd +add_access_rules <- function(sysmeta) { + if (!inherits(sysmeta, "SystemMetadata")) { + stop(paste0("An object of class ", class(sysmeta), " was passed in. Returning unmodified object.\n")) + } + + # Add myself explicitly as changePermission/write so I can update objects + # in the dev environment + if (env_get() == "development") { + sysmeta <- datapack::addAccessRule(sysmeta, env_load(skip_mn = TRUE)$submitter, "changePermission") + } + + sysmeta <- datapack::addAccessRule(sysmeta, "CN=arctic-data-admins,DC=dataone,DC=org", "read") + sysmeta <- datapack::addAccessRule(sysmeta, "CN=arctic-data-admins,DC=dataone,DC=org", "write") + sysmeta <- datapack::addAccessRule(sysmeta, "CN=arctic-data-admins,DC=dataone,DC=org", "changePermission") + + sysmeta +} + + +#' Add access to the given System Metadata for the arctic-data-admins group #' -#' High-level utility functions for getting and setting access rules for DataONE -#' objects. +#' @param sysmeta (sysmeta) System Metadata object. +#' +#' @noRd +add_admin_group_access <- function(sysmeta) { + if (!inherits(sysmeta, "SystemMetadata")) { + message(paste0("An object of class ", class(sysmeta), " was passed in. Returning unmodified object.\n")) + return(sysmeta) + } + sysmeta <- datapack::addAccessRule(sysmeta, "CN=arctic-data-admins,DC=dataone,DC=org", "read") + sysmeta <- datapack::addAccessRule(sysmeta, "CN=arctic-data-admins,DC=dataone,DC=org", "write") + sysmeta <- datapack::addAccessRule(sysmeta, "CN=arctic-data-admins,DC=dataone,DC=org", "changePermission") + + sysmeta +} -#' Set the rightsHolder field for a given PID. + +#' Set the rights holder for an object +#' +#' Set the rights holder to the given subject for the given objects on the +#' given Member Node. This function checks if the rights holder is already set +#' and only updates the System Metadata when a change is needed. #' -#' Update the rights holder to the provided subject for the object identified in -#' the provided system metadata document on the given Member Node. +#' @param mn (MNode) The Member Node. +#' @param pids (character) The PIDs of the objects to set the rights holder for. +#' @param subject (character) The identifier of the new rights holder, typically an ORCID or DN. #' -#' @param mn (MNode) The MNode instance to be changed. -#' @param pids (character) The identifiers for the object to be changed. -#' @param subject (character) The identifier of the new rightsHolder, often an ORCID or DN. +#' @return (logical) Whether an update was needed. #' #' @import dataone #' @import datapack +#' #' @export +#' +#' @examples +#'\dontrun{ +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' set_rights_holder(mn, pids, subjects = "http://orcid.org/0000-000X-XXXX-XXXX") +#'} set_rights_holder <- function(mn, pids, subject) { - stopifnot(class(mn) == "MNode") - stopifnot(is.character(pids), - all(nchar(pids) > 0)) - stopifnot(is.character(subject), - nchar(subject) > 0) + if (!is(mn, "MNode")) { + stop(paste0("Argument 'mn' is not an MNode but was a ", class(mn), " instead.")) + } + + if (!all(is.character(pids), + all(nchar(pids) > 0))) { + stop("Argument 'pids' must be character class with non-zero number of characters.") + } + + if (!all(is.character(subject), + nchar(subject) > 0)) { + stop("Argument 'subject' must be character class with non-zero number of characters.") + } + + if (grepl("^https:\\/\\/orcid\\.org", subject)) { + subject <- gsub("^https:\\/\\/orcid\\.org", "http:\\/\\/orcid\\.org", subject) + message("Subject contains https, transforming to http") + } + result <- vector(mode = "logical", length = length(pids)) @@ -29,15 +101,26 @@ set_rights_holder <- function(mn, pids, subject) { pid <- pids[i] # Get System Metadata - sysmeta <- dataone::getSystemMetadata(mn, pid) + sysmeta <- tryCatch({ + dataone::getSystemMetadata(mn, pid) + }, warning = function(w) { + message(paste0("Failed to get System Metadata for PID '", pid, "'\non MN '", mn@endpoint, "'.\n")) + w + }, error = function(e) { + message(paste0("Failed to get System Metadata for PID '", pid, "'\non MN '", mn@endpoint, "'.\n")) + message(e) + e + }) + + if (!inherits(sysmeta, "SystemMetadata")) { + stop("Failed to get System Metadata.") + } # Change rightsHolder (if needed) if (sysmeta@rightsHolder == subject) { - log_message(paste0("rightsHolder field is already set to ", subject, ". System Metadata not updated.")) result[i] <- TRUE } else { # Update System Metadata - log_message(paste0("Updating rightsHolder for PID ", pid, " from ", sysmeta@rightsHolder, " to ", subject, ".")) sysmeta@rightsHolder <- subject @@ -47,7 +130,7 @@ set_rights_holder <- function(mn, pids, subject) { sysmeta = sysmeta) }, error = function(e) { - log_message(e) + message(e) e }) @@ -63,34 +146,75 @@ set_rights_holder <- function(mn, pids, subject) { } -#' Set the access policy for a set of objects. +#' Set the access policy for an object #' -#' For each permission, this function checks if the permission is already set -#' and moves on. System Metadata are only updated when a change was needed. +#' Set the access policy for the given subjects for the given objects on the given Member Node. +#' For each type of permission, this function checks if the permission is already set +#' and only updates the System Metadata when a change is needed. #' #' @param mn (MNode) The Member Node. -#' @param pids (character) The object(s) to set the permissions on. -#' @param subjects (character) The subject(s) to set permissions for. -#' @param permissions (character) Optional. Vector of permissions. +#' @param pids (character) The PIDs of the objects to set permissions for. +#' @param subjects (character) The identifiers of the subjects to set permissions for, typically an ORCID or DN. +#' @param permissions (character) Optional. The permissions to set. Defaults to +#' read, write, and changePermission. +#' +#' @return (logical) Whether an update was needed. #' -#' @return (logical) Named #' @export #' #' @examples -set_access <- function(mn, pids, subjects, permissions=c("read", "write", "changePermission")) { - stopifnot(is(mn, "MNode")) - stopifnot(is.character(pids), - nchar(pids) > 0) - stopifnot(is.character(subjects), - nchar(subjects) > 0) - stopifnot(all(permissions %in% c("read", "write", "changePermission"))) +#'\dontrun{ +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' set_access(mn, pids, subjects = "http://orcid.org/0000-000X-XXXX-XXXX", +#' permissions = c("read", "write", "changePermission")) +#'} +set_access <- function(mn, pids, subjects, permissions = c("read", "write", "changePermission")) { + if (!is(mn, "MNode")) { + stop(paste0("Argument 'mn' is not an MNode but was a ", class(mn), " instead.")) + } + + if (!all(is.character(pids), + all(nchar(pids) > 0))) { + stop("Argument 'pids' must be character class with non-zero number of characters.") + } + + if (!all(is.character(subjects), + all(nchar(subjects)) > 0)) { + stop("Argument 'subjects' must be character class with non-zero number of characters.") + } + + if (grepl("^https:\\/\\/orcid\\.org", subjects)) { + subjects <- gsub("^https:\\/\\/orcid\\.org", "http:\\/\\/orcid\\.org", subjects) + message("Subject contains https, transforming to http") + } + + if (!all(permissions %in% c("read", "write", "changePermission"))) { + stop("Argument 'permissions' must be one or more of: 'read', 'write', 'changePermission'") + } + result <- c() for (pid in pids) { changed <- FALSE - sysmeta <- dataone::getSystemMetadata(mn, pid) + sysmeta <- tryCatch({ + dataone::getSystemMetadata(mn, pid) + }, warning = function(w) { + message(paste0("Failed to get System Metadata for PID '", pid, "'\non MN '", mn@endpoint, "'.\n")) + w + }, error = function(e) { + message(paste0("Failed to get System Metadata for PID '", pid, "'\non MN '", mn@endpoint, "'.\n")) + message(e) + e + }) + + if (!inherits(sysmeta, "SystemMetadata")) { + stop("Failed to get System Metadata.") + } for (subject in subjects) { for (permission in permissions) { @@ -103,10 +227,10 @@ set_access <- function(mn, pids, subjects, permissions=c("read", "write", "chang if (changed) { result[pid] <- TRUE - log_message(paste0("Updating System Metadata for ", pid, ".")) + message(paste0("Updating System Metadata for ", pid, ".")) dataone::updateSystemMetadata(mn, pid, sysmeta) } else { - log_message(paste0("No changes needed for ", pid, ". Not updating.")) + message(paste0("No changes needed for ", pid, ". Not updating.")) result[pid] <- FALSE } } @@ -117,109 +241,150 @@ set_access <- function(mn, pids, subjects, permissions=c("read", "write", "chang result } - -#' Set public access on a set of objects. +#' Remove a subject from an object's access policy #' -#' @param mn (MNode) -#' @param pids (character) A vector of PIDs to set public access on -#' -#' @return -#' @export +#' Remove the given subjects from the access policy for the given objects on the given Member Node. +#' For each type of permission, this function checks if the permission is already set +#' and only updates the System Metadata when a change is needed. #' -#' @examples -set_public_read <- function(mn, pids) { - set_access(mn, pids, "public", "read") -} - -#' Remove public access on a set of objects. +#' @param mn (MNode) The Member Node. +#' @param pids (character) The PIDs of the objects to set permissions for. +#' @param subjects (character) The identifiers of the subjects to set permissions for, typically an ORCID or DN. +#' @param permissions (character) Optional. The permissions to set. Defaults to +#' read, write, and changePermission. #' -#' @param mn (MNode) -#' @param pids (character) A vector of PIDs to set public access on +#' @return (logical) Whether an update was needed. #' -#' @return #' @export #' #' @examples -remove_public_read <- function(mn, pids) { - stopifnot(class(mn) == "MNode", - all(is.character(pids)), - all(nchar(pids) > 0)) +#'\dontrun{ +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' remove_access(mn, pids, subjects = "http://orcid.org/0000-000X-XXXX-XXXX", +#' permissions = c("read", "write", "changePermission")) +#'} +remove_access <- function(mn, pids, subjects, permissions = c("read", "write", "changePermission")) { + if (!is(mn, "MNode")) { + stop(paste0("Argument 'mn' is not an MNode but was a ", class(mn), " instead.")) + } - # Store the results of each attempted update - results <- c() + if (!all(is.character(pids), + all(nchar(pids) > 0))) { + stop("Argument 'pids' must be character class with non-zero number of characters.") + } + + if (!all(is.character(subjects), + all(nchar(subjects)) > 0)) { + stop("Argument 'subjects' must be character class with non-zero number of characters.") + } + + if (grepl("^https:\\/\\/orcid\\.org", subjects)) { + subjects <- gsub("^https:\\/\\/orcid\\.org", "http:\\/\\/orcid\\.org", subjects) + message("Subject contains https, transforming to http") + } + + if (!all(permissions %in% c("read", "write", "changePermission"))) { + stop("Argument 'permissions' must be one or more of: 'read', 'write', 'changePermission'") + } + + + result <- c() - # Remove public access for each PID for (pid in pids) { + changed <- FALSE + sysmeta <- tryCatch({ dataone::getSystemMetadata(mn, pid) - }, - error = function(e) { - log_message(paste0("Failed to get system metadata for PID '", pid, "' on MN '", mn@endpoint, "'.\n")) - log_message(e) + }, warning = function(w) { + message(paste0("Failed to get System Metadata for PID '", pid, "'\non MN '", mn@endpoint, "'.\n")) + w + }, error = function(e) { + message(paste0("Failed to get System Metadata for PID '", pid, "'\non MN '", mn@endpoint, "'.\n")) + message(e) e }) - if (inherits(sysmeta, "error")) { + if (!inherits(sysmeta, "SystemMetadata")) { stop("Failed to get System Metadata.") } - # Track whether we have changed the record to avoid an uncessary update call - changed <- FALSE - - if (!datapack::hasAccessRule(sysmeta, "public", "read")) { - log_message(paste0("Skipping setting public read because ", pid, " is not public.")) - next + for (subject in subjects) { + for (permission in permissions) { + if (datapack::hasAccessRule(sysmeta, subject, permission)) { + sysmeta <- datapack::removeAccessRule(sysmeta, subject, permission) + changed <- TRUE + } + } } - changed <- TRUE - - log_message(paste0("Removing public read access on ", pid, ".")) - sysmeta@accessPolicy <- sysmeta@accessPolicy[!(grepl("public", sysmeta@accessPolicy$subject) & grepl("read", sysmeta@accessPolicy$permission)),] - - # Update the sysmeta - update_response <- tryCatch({ + if (changed) { + result[pid] <- TRUE + message(paste0("Updating System Metadata for ", pid, ".")) dataone::updateSystemMetadata(mn, pid, sysmeta) - }, - error = function(e) { - log_message(paste0("Failed to update System Metadata for PID '", pid, "'.\n")) - log_message(e) - e - }) - - if (inherits(update_response, "error")) { - stop("Failed update.") + } else { + message(paste0("No changes needed for ", pid, ". Not updating.")) + result[pid] <- FALSE } - - # Save the result for this PID - results[pid] <- changed } - results + # Name the result vector + names(result) <- pids + + result } -#' Set the given subject as the rightsHolder and subject with write and -#' changePermission access for the given PID. +#' Set rights holder with access policy for an object +#' +#' Set the given subject as the rights holder and with given permissions +#' for the given objects. This function only updates the existing +#' System Metadata when a change is needed. #' -#' This function only updates the existing System Metadata if a change is -#' needed. +#' @param mn (MNode) The Member Node. +#' @param pids (character) The PIDs of the objects to set the rights holder and access policy for. +#' @param subject (character) The identifier of the new rights holder, typically an ORCID or DN. +#' @param permissions (character) Optional. The permissions to set. Defaults to +#' read, write, and changePermission. #' -#' @param mn (MNode) The Member Node to send the query. -#' @param pids (character) The PID(s) to set the access rule for. -#' @param subject (character)The subject of the rule(s). -#' @param permissions (character) The permissions for the rule. Defaults to -#' read, write, and changePermission. +#' @return (logical) Whether an update was needed. #' -#' @return Whether an update was needed. #' @export #' #' @examples -set_rights_and_access <- function(mn, pids, subject, permissions=c("read", "write", "changePermission")) { - stopifnot(class(mn) == "MNode", - all(is.character(pids)), - all(nchar(pids) > 0), - is.character(subject), - is.character(permissions)) +#'\dontrun{ +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' set_rights_and_access(mn, pids, "http://orcid.org/0000-000X-XXXX-XXXX", +#' permissions = c("read", "write", "changePermission")) +#'} +set_rights_and_access <- function(mn, pids, subject, permissions = c("read", "write", "changePermission")) { + if (!is(mn, "MNode")) { + stop(paste0("Argument 'mn' is not an MNode but was a ", class(mn), " instead.")) + } + + if (!all(is.character(pids), + all(nchar(pids) > 0))) { + stop("Argument 'pids' must be character class with non-zero number of characters.") + } + + if (!all(is.character(subject), + nchar(subject) > 0)) { + stop("Argument 'subject' must be character class with non-zero number of characters.") + } + + if (grepl("^https:\\/\\/orcid\\.org", subject)) { + subject <- gsub("^https:\\/\\/orcid\\.org", "http:\\/\\/orcid\\.org", subject) + message("Subject contains https, transforming to http") + } + + if (!all(permissions %in% c("read", "write", "changePermission"))) { + stop("Argument 'permissions' must be one or more of: 'read', 'write', 'changePermission'") + } # Store the results of each attempted update results <- c() @@ -228,51 +393,53 @@ set_rights_and_access <- function(mn, pids, subject, permissions=c("read", "writ for (pid in pids) { sysmeta <- tryCatch({ dataone::getSystemMetadata(mn, pid) - }, - error = function(e) { - log_message(paste0("Failed to get system metadata for PID '", pid, "' on MN '", mn@endpoint, "'.\n")) - log_message(e) + }, warning = function(w) { + message(paste0("Failed to get System Metadata for PID '", pid, "'\non MN '", mn@endpoint, "'.\n")) + w + }, error = function(e) { + message(paste0("Failed to get System Metadata for PID '", pid, "'\non MN '", mn@endpoint, "'.\n")) + message(e) e }) - if (inherits(sysmeta, "error")) { + if (!inherits(sysmeta, "SystemMetadata")) { stop("Failed to get System Metadata.") } - # Track whether we have changed the record to avoid an uncessary update call + # Track whether we have changed the record to avoid an unnecessary update call changed <- FALSE # Set rights holder if needed if (subject != sysmeta@rightsHolder) { changed <- TRUE - log_message(paste0("Setting rights holder to ", subject, ".")) + message(paste0("Setting rights holder to ", subject, ".")) sysmeta@rightsHolder <- subject } else { - log_message(paste0("Skipping setting rightsHolder as rightsHolder is already ", sysmeta@rightsHolder, ".\n")) + message(paste0("Skipping setting rightsHolder as rightsHolder is already ", sysmeta@rightsHolder, ".\n")) } for (permission in permissions) { if (datapack::hasAccessRule(sysmeta, subject, permission)) { - log_message(paste0("Skipping the addition of permission '", permission, "' for subject '", subject, "'\n")) + message(paste0("Skipping the addition of permission '", permission, "' for subject '", subject, "'\n")) next } changed <- TRUE - log_message(paste0("Adding permission '", permission, "' for subject '", subject, "'\n")) + message(paste0("Adding permission '", permission, "' for subject '", subject, "'\n")) sysmeta <- datapack::addAccessRule(sysmeta, subject, permission) } if (changed == TRUE) { - log_message(paste0("Updating System Metadata for ", pid, ".")) + message(paste0("Updating System Metadata for ", pid, ".")) update_response <- tryCatch({ dataone::updateSystemMetadata(mn, pid, sysmeta) }, error = function(e) { - log_message(paste0("Failed to update System Metadata for PID '", pid, "'.\n")) - log_message(e) + message(paste0("Failed to update System Metadata for PID '", pid, "'.\n")) + message(e) e }) @@ -280,7 +447,114 @@ set_rights_and_access <- function(mn, pids, subject, permissions=c("read", "writ stop("Failed update.") } } else { - log_message(paste0("No changes needed for ", pid, ".")) + message(paste0("No changes needed for ", pid, ".")) + } + + # Save the result for this PID + results[pid] <- changed + } + + results +} + + +#' Set public read access for an object +#' +#' Set public read access for an object. +#' +#' @param mn (MNode) The Member Node. +#' @param pids (character) The PIDs of the objects to set public read access for. +#' +#' @return (logical) Whether an update was needed. +#' +#' @export +#' +#' @examples +#'\dontrun{ +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' set_public_read(mn, pids) +#'} +set_public_read <- function(mn, pids) { + set_access(mn, pids, "public", "read") +} + + +#' Remove public read access for an object +#' +#' Remove public read access for an object. +#' +#' @param mn (MNode) The Member Node. +#' @param pids (character) The PIDs of the objects to remove public read access for. +#' +#' @export +#' +#' @examples +#'\dontrun{ +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' remove_public_read(mn, pids) +#'} +remove_public_read <- function(mn, pids) { + if (!is(mn, "MNode")) { + stop(paste0("Argument 'mn' is not an MNode but was a ", class(mn), " instead.")) + } + + if (!all(is.character(pids), + all(nchar(pids) > 0))) { + stop("Argument 'pids' must be character class with non-zero number of characters.") + } + + + # Store the results of each attempted update + results <- c() + + # Remove public read access for each PID + for (pid in pids) { + sysmeta <- tryCatch({ + dataone::getSystemMetadata(mn, pid) + }, warning = function(w) { + message(paste0("Failed to get System Metadata for PID '", pid, "'\non MN '", mn@endpoint, "'.\n")) + w + }, error = function(e) { + message(paste0("Failed to get System Metadata for PID '", pid, "'\non MN '", mn@endpoint, "'.\n")) + message(e) + e + }) + + if (!inherits(sysmeta, "SystemMetadata")) { + stop("Failed to get System Metadata.") + } + + # Track whether we have changed the record to avoid an uncessary update call + changed <- FALSE + + if (!datapack::hasAccessRule(sysmeta, "public", "read")) { + message(paste0("Skipping setting public read because ", pid, " is not public.")) + next + } + + changed <- TRUE + + message(paste0("Removing public read access on ", pid, ".")) + sysmeta@accessPolicy <- sysmeta@accessPolicy[!(grepl("public", sysmeta@accessPolicy$subject) & grepl("read", sysmeta@accessPolicy$permission)), ] + + # Update the sysmeta + update_response <- tryCatch({ + dataone::updateSystemMetadata(mn, pid, sysmeta) + }, + error = function(e) { + message(paste0("Failed to update System Metadata for PID '", pid, "'.\n")) + message(e) + e + }) + + if (inherits(update_response, "error")) { + stop("Failed update.") } # Save the result for this PID @@ -289,3 +563,62 @@ set_rights_and_access <- function(mn, pids, subject, permissions=c("read", "writ results } + + +#' Check whether an object has public read access +#' +#' Check whether objects have public read access. +#' No token needs to be set to use this function. +#' +#' @param mn (MNode) The Member Node. +#' @param pids (character) The PIDs of the objects to check for public read access. +#' @param use.names (logical) If `TRUE`, PIDs will +#' be used as names for the result unless PIDs have names already, in which case +#' those names will be used for the result. +#' +#' @return (logical) Whether an object has public read access. +#' +#' @importFrom httr content +#' +#' @export +#' +#' @examples +#'\dontrun{ +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' is_public_read(mn, pids) +#'} +is_public_read <- function(mn, pids, use.names = TRUE) { + if (!is(mn, "MNode")) { + stop(paste0("Argument 'mn' is not an MNode but was a ", class(mn), " instead.")) + } + + if (!all(is.character(pids), + all(nchar(pids) > 0))) { + stop("Argument 'pids' must be character class with non-zero number of characters.") + } + + if (!is.logical(use.names)) { + stop(paste0("Argument 'use.names' must be logical class, but was a ", class(use.names), " instead.")) + } + + vapply(pids, USE.NAMES = use.names, FUN.VALUE = logical(1), FUN = function(pid) { + + url <- paste(mn@endpoint, "meta", utils::URLencode(pid, reserved = TRUE), sep = "/") + response <- dataone:::auth_get(url, node = mn) + + if (response$status_code != "200") { + error_desc <- dataone:::getErrorDescription(response) + if (grepl("READ not allowed", error_desc, ignore.case = TRUE)) { + return(FALSE) + } else { + stop(error_desc) + } + } + + sysmeta <- datapack::SystemMetadata(XML::xmlRoot(suppressMessages(XML::xmlParse((httr::content(response, as = "text")))))) + return(datapack::hasAccessRule(sysmeta, "public", "read")) + }) +} diff --git a/R/arcticdatautils.R b/R/arcticdatautils.R new file mode 100644 index 0000000..0dd2de0 --- /dev/null +++ b/R/arcticdatautils.R @@ -0,0 +1,15 @@ +#' arcticdatautils: Utilities for the Arctic Data Center +#' +#' This package contains code for doing lots of useful stuff that's too specific for the +#' dataone package, primarily functions that streamline Arctic Data Center operations. +#' +#' @docType package +#' @name arcticdatautils +#' +#' @import dataone +#' @import datapack +#' @import EML +#' @importFrom methods as is new slot slot<- +#' @importFrom stats na.omit +#' @importFrom utils URLencode head read.csv read.delim setTxtProgressBar txtProgressBar +NULL diff --git a/R/attributes.R b/R/attributes.R index 8042a40..f2d77bc 100644 --- a/R/attributes.R +++ b/R/attributes.R @@ -1,8 +1,14 @@ +# Functions for editing metadata attributes + + #' Get a data.frame of attributes from a NetCDF object #' -#' @param nc (ncdf4 or character) Either a ncdf4 object or a file path +#' Get a data.frame of attributes from a NetCDF object. +#' +#' @param nc (ncdf4/character) Either a ncdf4 object or a file path. +#' +#' @return (data.frame) A data.frame of the attributes. #' -#' @return (data.frame) A data.frame of the attributes #' @export #' #' @examples @@ -12,6 +18,12 @@ get_ncdf4_attributes <- function(nc) { stopifnot(is(nc, "ncdf4") || file.exists(nc)) + if (!requireNamespace("ncdf4")) { + stop(call. = FALSE, + "The package 'ncdf4' must be installed to run this function. ", + "Please install it and try again.") + } + # Read the file in if `nc` is a character vector if (is.character(nc)) { nc <- ncdf4::nc_open(nc) @@ -20,7 +32,7 @@ get_ncdf4_attributes <- function(nc) { unitlist <- c() - for (i in 1:length(dims)){ + for (i in 1:length(dims)) { unitlist[i] <- dims[[i]]$units } inds <- which(unitlist != '') @@ -29,7 +41,7 @@ get_ncdf4_attributes <- function(nc) { attributes <- c(names(nc$var), attributes(dims)$names) - result <- data.frame(attributeName=NA) + result <- data.frame(attributeName = NA) for (i in seq_along(attributes)) { result[i,"attributeName"] <- attributes[i] @@ -43,4 +55,3 @@ get_ncdf4_attributes <- function(nc) { result } - diff --git a/R/dataone.R b/R/dataone.R index 5475d0b..3a6cb77 100644 --- a/R/dataone.R +++ b/R/dataone.R @@ -1,15 +1,22 @@ -#' dataone.R -#' -#' Helpers for the DataONE R package. +# Helper functions for the dataone package + +#' Test whether a token is set +#' #' Test whether a token is set. #' -#' @param node (MNode|CNode) The CN or MN you want to find a token for. +#' @param node (MNode/CNode) The Member/Coordinating Node to query. +#' +#' @return (logical) #' -#' @return (boolean) #' @export #' #' @examples +#'\dontrun{ +#'cn <- CNode('STAGING2') +#'mn <- getMNode(cn,"urn:node:mnTestKNB") +#'is_token_set(mn) +#'} is_token_set <- function(node) { token <- tryCatch(get_token(node), error = function(e) FALSE) @@ -22,14 +29,22 @@ is_token_set <- function(node) { } -#' Gets the currently set authentication token. +#' Get the currently set authentication token #' -#' @param node (MNode|CNode) The CN or MN you want to find a token for. +#' Get the currently set authentication token. +#' +#' @param node (MNode/CNode) The Member/Coordinating Node to query. #' #' @return (character) The token. +#' #' @export #' #' @examples +#'\dontrun{ +#'cn <- CNode('STAGING2') +#'mn <- getMNode(cn,"urn:node:mnTestKNB") +#'get_token(mn) +#'} get_token <- function(node) { if (!(class(node) %in% c("MNode", "CNode"))) { stop(paste0("Node must be an MNode or CNode. You passed in a '", class(node), "'.")) @@ -49,12 +64,22 @@ get_token <- function(node) { } +#' Determine whether token is expired +#' #' Determine whether the set token is expired. #' -#' @return +#' @param node (character) The Member Node. +#' +#' @return (logical) +#' #' @export #' #' @examples +#' \dontrun{ +#' cn <- CNode('STAGING2') +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' is_token_expired(mn) +#' } is_token_expired <- function(node) { token_name <- ifelse(node@env == "prod", "dataone_token", "dataone_test_token") @@ -87,31 +112,52 @@ is_token_expired <- function(node) { } -#' Get the base URL of the Member Node. +#' Get base URL of a Member Node +#' +#' Get the base URL of a Member Node. #' -#' @param mn +#' @param mn (character) The Member Node. +#' +#' @return (character) The URL. #' -#' @return #' @export #' #' @examples +#'\dontrun{ +#' cn <- CNode('STAGING2') +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +# 'url <- get_mn_base_url(mn) +#'} get_mn_base_url <- function(mn) { # Determine MN URL. Accept either an MNode or a character string - if (class(mn) == "MNode") { - mn_base_url <- mn$base_url + if (is(mn, "MNode")) { + mn_base_url <- mn@base_url } mn_base_url <- mn } +#' Check if user has authorization to perform an action on an object +#' #' Check if the user has authorization to perform an action on an object. #' -#' @param node (MNode|CNode) The Node to query. +#' @param node (MNode/CNode) The Member/Coordinating Node to query. #' @param ids (character) The PID or SID to check. #' @param action (character) One of read, write, or changePermission. #' +#' @return (logical) +#' #' @export +#' +#' @examples +#'\dontrun{ +#' cn <- CNode('STAGING2') +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' is_authorized(mn, pids, "write") +#'} is_authorized <- function(node, ids, action) { stopifnot(class(node) %in% c("MNode", "CNode")) stopifnot(is.character(ids)) diff --git a/R/dataone_formats.R b/R/dataone_formats.R deleted file mode 100644 index af87ae7..0000000 --- a/R/dataone_formats.R +++ /dev/null @@ -1,41 +0,0 @@ -#' dataone_formats.R -#' -#' A set of thin functions which return the DataONE format ID string. These are -#' to aid in filling in function arguments and can't remember or don't want to -#' type in the full format ID. By putting these format ID strings into -#' functions, a user's autocompletion routine in their editor can help them -#' fill in the format ID they want. - -#' Helper function to generate the ISO 19139 format ID.w -#' -#' @return (character) The format ID for ISO 19139. -#' @export -#' -#' @examples -#' format_iso() -#' \dontrun{ -#' # Upload a local ISO19139 XML file: -#' env <- env_load() -#' publish_object(env$mn, "path_to_some_EML_file", format_iso()) -#' } -format_iso <- function() { - "http://www.isotc211.org/2005/gmd" -} - - -#' Helper function to generate the EML 2.1.1 format ID. -#' -#' @return (character) The format ID for EML 2.1.1. -#' @export -#' -#' @examples -#' format_eml -#' -#' \dontrun{ -#' # Upload a local EML 2.1.1 file: -#' env <- env_load() -#' publish_object(env$mn, "path_to_some_EML_file", format_eml()) -#' } -format_eml <- function() { - "eml://ecoinformatics.org/eml-2.1.1" -} diff --git a/R/editing.R b/R/editing.R index f6cf38e..2fd0e29 100644 --- a/R/editing.R +++ b/R/editing.R @@ -1,36 +1,47 @@ -#' editing.R -#' -#' High-level functions for managing content. +# High-level functions for managing content -#' Publish an object on a member node +#' Publish an object on a Member Node #' -#' Use sensible defaults to publish an object on a member node. If identifier is provided, -#' use it, otherwise generate a UUID. If clone_id is provided, then retrieve the +#' Use sensible defaults to publish an object on a Member Node. If identifier is provided, +#' use it, otherwise generate a UUID. If clone_id is provided, then retrieve the #' system metadata for that identifier and use it to provide rightsHolder, accessPolicy, #' and replicationPolicy metadata. Note that this function only uploads the object to #' the Member Node, and does not add it to a data package, which can be done separately. #' #' @param mn (MNode) The Member Node to publish the object to. -#' @param path the path to the file to be published -#' @param format_id (character) Optional. The format ID to set for the object. When not set, \code{\link{guess_format_id}} will be used to guess the format ID. Should be a \href{https://cn.dataone.org/cn/v2/formats}{DataONE format ID}. +#' @param path (character) The path to the file to be published. +#' @param format_id (character) Optional. The format ID to set for the object. +#' When not set, [guess_format_id()] will be used to guess the format ID. +#' Should be a \href{https://cn.dataone.org/cn/v2/formats}{DataONE format ID}. #' @param pid (character) Optional. The PID to use with the object. #' @param sid (character) Optional. The SID to use with the new object. -#' @param clone_pid (character) PID of objet to clone System Metadata from +#' @param clone_pid (character) PID of object to clone System Metadata from. +#' @param public (logical) Whether object should be given public read access. +#' +#' @return pid (character) The PID of the published object. #' #' @import dataone #' @import datapack #' #' @export +#' +#' @examples +#'\dontrun{ +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' my_path <- "/home/Documents/myfile.csv" +#' pid <- publish_object(mn, path = my_path, format_id = "text/csv", public = FALSE) +#'} publish_object <- function(mn, path, - format_id=NULL, - pid=NULL, - sid=NULL, - clone_pid=NULL, - public=TRUE) { + format_id = NULL, + pid = NULL, + sid = NULL, + clone_pid = NULL, + public = TRUE) { - stopifnot(class(mn) == "MNode") + stopifnot(is(mn, "MNode")) stopifnot(file.exists(path)) # Decide the format_id @@ -44,11 +55,7 @@ publish_object <- function(mn, warning(paste0("No format_id was specified so a guess was made based upon the file extension: ", format_id, ".")) } - # Check if format ID is valid - if (!(format_id %in% D1_FORMATS)) { - stop(call. = FALSE, - paste0("The format_id of '", format_id, "' is not a valid format ID. See https://cn.dataone.org/cn/v2/formats for the current list. This package stores a copy and may be out of date with that list so please email the author if needed.")) - } + check_format(format_id) # Set up some variables for use later on ######################################## @@ -56,7 +63,7 @@ publish_object <- function(mn, # Get the clone_pid sysmeta to use for the rightsHolder and accessPolicy, and replicationPolicy if (!is.null(clone_pid)) { - log_message(paste0("Cloning System Metadata for new object from ", clone_pid, ".")) + message(paste0("Cloning System Metadata for new object from ", clone_pid, ".")) clone_sysmeta <- dataone::getSystemMetadata(mn, clone_pid) } @@ -80,7 +87,7 @@ publish_object <- function(mn, sysmeta <- clear_replication_policy(sysmeta) if (!is.null(sid)) { - log_message(paste0("Setting SID to '", sid, "'.")) + message(paste0("Setting SID to '", sid, "'.")) sysmeta@seriesId <- sid } @@ -97,8 +104,10 @@ publish_object <- function(mn, } sysmeta <- add_admin_group_access(sysmeta) - sysmeta <- datapack::addAccessRule(sysmeta, "public", "read") - sysmeta@fileName <- basename(path) + if (public == TRUE) { + sysmeta <- datapack::addAccessRule(sysmeta, "public", "read") + } + sysmeta@fileName <- reformat_file_name(basename(path), sysmeta) dataone::createObject(mn, pid = pid, @@ -106,22 +115,36 @@ publish_object <- function(mn, sysmeta = sysmeta) } -#' Update an object with a new file. + +#' Update an object with a new file #' -#' This is a convenience wrapper around `dataone::updateObject` which copies in +#' This is a convenience wrapper around [dataone::updateObject()] which copies in #' fields from the old object's System Metadata such as the rightsHolder and #' accessPolicy and updates only what needs to be changed. #' #' @param mn (MNode) The Member Node to update the object on. #' @param pid (character) The PID of the object to update. #' @param path (character) The full path to the file to update with. -#' @param format_id (character) Optional. The format ID to set for the object. When not set, \code{\link{guess_format_id}} will be used to guess the format ID. Should be a \href{https://cn.dataone.org/cn/v2/formats}{DataONE format ID}. +#' @param format_id (character) Optional. The format ID to set for the object. +#' When not set, [guess_format_id()] will be used to guess the format ID. +#' Should be a \href{https://cn.dataone.org/cn/v2/formats}{DataONE format ID}. +#' @param new_pid (character) Optional. Specify the PID for the new object. +#' Defaults to automatically generating a new, random UUID-style PID. +#' @param sid (character) Optional. Specify a Series ID (SID) to use for the new object. #' #' @return (character) The PID of the updated object. +#' #' @export #' #' @examples -update_object <- function(mn, pid, path, format_id=NULL, new_pid=NULL, sid=NULL) { +#'\dontrun{ +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" +#' my_path <- "/home/Documents/myfile.csv" +#' new_pid <- update_object(mn, pid, my_path, format_id = "text/csv") +#'} +update_object <- function(mn, pid, path, format_id = NULL, new_pid = NULL, sid = NULL) { stopifnot(is(mn, "MNode")) stopifnot(object_exists(mn, pid)) stopifnot(file.exists(path)) @@ -137,13 +160,9 @@ update_object <- function(mn, pid, path, format_id=NULL, new_pid=NULL, sid=NULL) warning(paste0("No format_id was specified so a guess was made based upon the file extension: ", format_id, ".")) } - # Check if format ID is valid - if (!(format_id %in% D1_FORMATS)) { - stop(call. = FALSE, - paste0("The format_id of '", format_id, "' is not a valid format ID. See https://cn.dataone.org/cn/v2/formats for the current list. This package stores a copy and may be out of date with that list so please email the author if needed.")) - } + check_format(format_id) - log_message(paste0("Updating object ", pid, " with the file at ", path, ".")) + message(paste0("Updating object ", pid, " with the file at ", path, ".")) # Generate a PID if needed if (is.null(new_pid)) { @@ -165,7 +184,20 @@ update_object <- function(mn, pid, path, format_id=NULL, new_pid=NULL, sid=NULL) sysmeta@checksumAlgorithm <- "SHA1" slot(sysmeta, "obsoletes", check = FALSE) <- NA slot(sysmeta, "obsoletedBy", check = FALSE) <- NA - sysmeta@fileName <- basename(path) + sysmeta@fileName <- reformat_file_name(basename(path), sysmeta) + + # Set the replication policy back to default + sysmeta <- clear_replication_policy(sysmeta) + + # Add packageId to metadata if the object is an xml file + if (grepl("^eml:\\/\\/ecoinformatics.org\\/eml|^https://eml.ecoinformatics.org", format_id)) { + doc <- EML::read_eml(path) + doc$packageId <- new_pid + path <- tempfile() + EML::write_eml(doc, path) + # File changed - update checksum + sysmeta@checksum <- digest::digest(path, algo = "sha1", serialize = FALSE, file = TRUE) + } # Make the update dataone::updateObject(mn, @@ -176,7 +208,9 @@ update_object <- function(mn, pid, path, format_id=NULL, new_pid=NULL, sid=NULL) } -#' Publish an updated data package. +#' Publish an updated data package +#' +#' Publish an update to a data package after updating data files or metadata. #' #' This function can be used for a variety of tasks: #' @@ -189,11 +223,11 @@ update_object <- function(mn, pid, path, format_id=NULL, new_pid=NULL, sid=NULL) #' The metadata_pid and resource_map_pid provide the identifier of an EML metadata #' document and associated resource map, and the data_pids vector provides a list #' of PIDs of data objects in the package. Update the metadata file and resource map -#' by generating a new identifier (a DOI if use_doi is TRUE) and updating the Member +#' by generating a new identifier (a DOI if `use_doi = TRUE`) and updating the Member #' Node with a public version of the object. If metadata_file is not missing, it #' should be an edited version of the metadata to be used to update the original. If #' parent_resmap_pid is not missing, it indicates the PID of a parent package that -#' should be updated as well, using the parent_medata_pid, parent_data_pids, and +#' should be updated as well, using the parent_metadata_pid, parent_data_pids, and #' parent_child_pids as members of the updated package. In all cases, the objects #' are made publicly readable. #' @@ -204,46 +238,117 @@ update_object <- function(mn, pid, path, format_id=NULL, new_pid=NULL, sid=NULL) #' @param identifier (character) Manually specify the identifier for the new metadata object. #' @param use_doi (logical) Generate and use a DOI as the identifier for the updated metadata object. #' @param parent_resmap_pid (character) Optional. PID of a parent package to be updated. +#' Not optional if a parent package exists. #' @param parent_metadata_pid (character) Optional. Identifier for the metadata document of the parent package. +#' Not optional if a parent package exists. #' @param parent_data_pids (character) Optional. Identifier for the data objects of the parent package. +#' Not optional if the parent package contains data objects. #' @param parent_child_pids (character) Optional. Resource map identifier(s) of child packages in the parent package. +#' \code{resource_map_pid} should not be included. Not optional if the parent package contains other child packages. #' @param child_pids (character) Optional. Child packages resource map PIDs. -#' @param metadata_path (character) Optional. Path to a metadata file to update with. If this is not set, the existing metadata document will be used. -#' @param public (logical) Optional. Make the update public. If FALSE, will set the metadata and resource map to private (but not the data objects). -#' This applies to the new metadata PID and its resource map and data object. -#' access policies are not affected. -#' @param check_first (logical) Optional. Whether to check the PIDs passed in as aruments exist on the MN before continuing. Checks that objects exist and are of the right format type. This speeds up the function, especially when `data_pids` has many elements. -#' @param parent_data_pids +#' @param metadata_path (character or eml) Optional. An eml class object or a path to a metadata file to update with. +#' If this is not set, the existing metadata document will be used. +#' @param public (logical) Optional. Make the update public. If `FALSE`, will set the metadata and resource map to private (but not the data objects). +#' This applies to the new metadata PID and its resource map and data object. +#' access policies are not affected. +#' @param check_first (logical) Optional. Whether to check the PIDs passed in as arguments exist on the MN before continuing. +#' Checks that objects exist and are of the right format type. This speeds up the function, especially when `data_pids` has many elements. +#' @param format_id (character) Optional. When omitted, the updated object will have the same formatId as `metadata_pid`. If set, will attempt +#' to use the value instead. +#' @param keep_prov (logical) Option to force publish_update to keep prov +#' +#' @return (character) Named character vector of PIDs in the data package, including PIDs for the metadata, resource map, and data objects. #' #' @import dataone #' @import datapack #' @import EML #' #' @export +#' +#' @examples +#'\dontrun{ +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' +#' rm_pid <- "resource_map_urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" +#' meta_pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" +#' data_pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' +#' meta_path <- "/home/Documents/myMetadata.xml" +#' +#' publish_update(mn, meta_pid, rm_pid, data_pids, meta_path, public = TRUE) +#'} publish_update <- function(mn, metadata_pid, resource_map_pid, - data_pids=NULL, - child_pids=NULL, - metadata_path=NULL, - identifier=NULL, - use_doi=FALSE, - parent_resmap_pid=NULL, - parent_metadata_pid=NULL, - parent_data_pids=NULL, - parent_child_pids=NULL, - public=TRUE, - check_first=TRUE) { + data_pids = NULL, + child_pids = NULL, + metadata_path = NULL, + identifier = NULL, + use_doi = FALSE, + parent_resmap_pid = NULL, + parent_metadata_pid = NULL, + parent_data_pids = NULL, + parent_child_pids = NULL, + public = TRUE, + check_first = TRUE, + format_id = NULL, + keep_prov = FALSE) { # Don't allow setting a dataset to private when it uses a DOI if (use_doi && !public) { - stop("You cannot use a DOI and set public=FALSE as the same time.") + stop("You cannot use a DOI and set 'public = FALSE' at the same time.") } # Do a simple sanity check on the PIDs passed in + stopifnot(is.character(metadata_pid)) + stopifnot(is.character(resource_map_pid)) + + if (!is.null(data_pids)) { + stopifnot(all(is.character(data_pids))) + } + + if (!is.null(child_pids)) { + stopifnot(all(is.character(child_pids))) + } + + if (!is.null(identifier)) { + stopifnot(is.character(identifier)) + } + + if (!is.null(parent_resmap_pid)) { + stopifnot(is.character(parent_resmap_pid)) + } + + if (!is.null(parent_metadata_pid)) { + stopifnot(is.character(parent_metadata_pid)) + } + + if (!is.null(parent_data_pids)) { + stopifnot(all(is.character(parent_data_pids))) + } + + if (!is.null(parent_child_pids)) { + stopifnot(all(is.character(parent_child_pids))) + } + + if (!is.null(format_id)) { + stopifnot(is.character(format_id) && nchar(format_id) > 0) + } + + # Check to see if the obsoleted package is in the list of parent_child_pids + # If it is notify the user and remove it from the list + if (resource_map_pid %in% parent_child_pids) { + message("Removing the old resource map from the list of child PIDs in the parent package.") + resource_map_pid_index <- which(resource_map_pid == parent_child_pids) + parent_child_pids <- parent_child_pids[-resource_map_pid_index] + } + all_pids <- c(metadata_pid, resource_map_pid, data_pids, child_pids, identifier, parent_resmap_pid, parent_metadata_pid, parent_data_pids, parent_child_pids) + duped <- duplicated(all_pids) if (any(duped)) { @@ -275,6 +380,17 @@ publish_update <- function(mn, stopifnot(object_exists(mn, parent_data_pids)) if (!is.null(parent_child_pids)) stopifnot(object_exists(mn, parent_child_pids)) + # Check for obsoleted metadata_pid + meta_obsoletedBy <- dataone::getSystemMetadata(mn, metadata_pid)@obsoletedBy + if (!is.na(meta_obsoletedBy)) { + stop("The value passed in for the argument 'metadata_pid' of '", metadata_pid, "' is already obsoleted by a newer version with PID '", meta_obsoletedBy, "'. All PID arguments to publish_update should be the latest versions of each object series.") + } + } + + # Check for obsoleted resource_map_pid. The resource map and metadata can desassociate without this check. + rm_obsoletedBy <- dataone::getSystemMetadata(mn, resource_map_pid)@obsoletedBy + if (!is.na(rm_obsoletedBy)) { + stop("The value passed in for the argument 'resource_map_pid' of '", resource_map_pid, "' is already obsoleted by a newer version with PID '", rm_obsoletedBy, "'. All PID arguments to publish_update should be the latest versions of each object series.") } # Prepare the response object @@ -286,35 +402,42 @@ publish_update <- function(mn, # Get some things from the node if (is.null(metadata_path)) { # Get the metadata doc - log_message("Getting metadata from the MN.") - eml <- EML::read_eml(rawToChar(dataone::getObject(mn, metadata_pid)), asText = TRUE) + message("Getting metadata from the MN.") + doc <- EML::read_eml(dataone::getObject(mn, metadata_pid)) + + } else if (class(metadata_path)[1] == "emld") { + # If an eml object is provided, use it directly after validating + if (!eml_validate(metadata_path)) { + stop("The EML object is not valid.") + } + + doc <- metadata_path + } else { # Alternatively, read an edited metadata file from disk if provided if (!file.exists(metadata_path)) { stop(paste0("Metadata doesn't exist: ", metadata_path)) } - log_message(paste0("Getting metadata from the path: ", metadata_path, ".")) - eml <- EML::read_eml(metadata_path) + message(paste0("Getting metadata from the path: ", metadata_path, ".")) + doc <- EML::read_eml(metadata_path) } # get the metadata sysmeta from the node metadata_sysmeta <- dataone::getSystemMetadata(mn, metadata_pid) - log_message("Downloaded EML and sysmeta...") - # Generate PIDs for our updated objects if (is.null(identifier)) { if (use_doi) { - log_message("Minting a new DOI") + message("Minting a new DOI") metadata_updated_pid <- dataone::generateIdentifier(mn, scheme = "DOI") - log_message(paste0("Minted a new DOI of ", metadata_updated_pid, ".")) + message(paste0("Minted a new DOI of ", metadata_updated_pid, ".")) } else { metadata_updated_pid <- new_uuid() - log_message(paste0("Using generated UUID PID of ", metadata_updated_pid, ".")) + message(paste0("Using generated UUID PID of ", metadata_updated_pid, ".")) } } else { - log_message(paste0("Using manually-specified identifier of ", identifier, ".")) + message(paste0("Using manually-specified identifier of ", identifier, ".")) metadata_updated_pid <- identifier } @@ -324,22 +447,35 @@ publish_update <- function(mn, # Update the metadata # Replace packageId - eml@packageId <- new("xml_attribute", metadata_updated_pid) + doc$packageId <- metadata_updated_pid # Replace system if needed - if (eml@system != "https://arcticdata.io") { - eml@system <- new("xml_attribute", "https://arcticdata.io") + if (is.null(doc$system)) { + doc$system <- "https://search.dataone.org" + } + + # Replace access if needed + if (length(doc$access$allow) & (!is.null(metadata_path))) { + doc$access <- list() } # Write out the document to disk. We do this in part because # set_other_entities takes a path to the doc. eml_path <- tempfile() - EML::write_eml(eml, eml_path) + EML::write_eml(doc, eml_path) # Create System Metadata for the updated EML file + # First, figure out what formatId we should use on the new object + if (!is.null(format_id)) { + message("Overridding format ID on new metadata object of: ", format_id, " instead of ", metadata_sysmeta@formatId, ".") + metadata_updated_format_id <- format_id + } else { + metadata_updated_format_id <- metadata_sysmeta@formatId + } + metadata_updated_sysmeta <- new("SystemMetadata", identifier = metadata_updated_pid, - formatId = "eml://ecoinformatics.org/eml-2.1.1", + formatId = metadata_updated_format_id, size = file.size(eml_path), checksum = digest::digest(eml_path, algo = "sha1", serialize = FALSE, file = TRUE), checksumAlgorithm = "SHA1", @@ -348,21 +484,21 @@ publish_update <- function(mn, obsoletes = metadata_pid, fileName = metadata_sysmeta@fileName) - # Temporarily clear out the replication policy to work around NCEI not being - # Tier 4 MN - metadata_updated_sysmeta <- clear_replication_policy(metadata_updated_sysmeta) - # Set the SID if one existed on old metadata object if (!is.na(metadata_sysmeta@seriesId)) { metadata_updated_sysmeta@seriesId <- metadata_sysmeta@seriesId } + # Copy access and replication details from object we're updating metadata_updated_sysmeta@accessPolicy <- metadata_sysmeta@accessPolicy metadata_updated_sysmeta@replicationAllowed <- metadata_sysmeta@replicationAllowed metadata_updated_sysmeta@numberReplicas <- metadata_sysmeta@numberReplicas metadata_updated_sysmeta@preferredNodes <- metadata_sysmeta@preferredNodes metadata_updated_sysmeta@blockedNodes <- metadata_sysmeta@blockedNodes + # Set the replication information to the defaults + metadata_updated_sysmeta <- clear_replication_policy(metadata_updated_sysmeta) + if (public) { # Make the metadata public metadata_updated_sysmeta <- datapack::addAccessRule(metadata_updated_sysmeta, "public", "read") @@ -372,9 +508,12 @@ publish_update <- function(mn, set_public_read(mn, data_pid) } } else { - metadata_updated_sysmeta <- remove_public_access(metadata_updated_sysmeta) + metadata_updated_sysmeta <- datapack::removeAccessRule(metadata_updated_sysmeta, "public", "read") } + # Update fileName to follow ADC naming conventions + metadata_updated_sysmeta@fileName <- reformat_file_name(doc$dataset$title, metadata_updated_sysmeta) + set_rights_holder(mn, metadata_pid, me) dataone::updateObject(mn, @@ -393,7 +532,7 @@ publish_update <- function(mn, metadata_pid, metadata_sysmeta@rightsHolder) - log_message(paste0("Updated metadata document ", metadata_pid, " with ", metadata_updated_pid, ".")) + message(paste0("Updated metadata document ", metadata_pid, " with ", metadata_updated_pid, ".")) # Update the resource map ######################### @@ -404,11 +543,11 @@ publish_update <- function(mn, child_pids = child_pids, identifier = resmap_updated_pid, public = public, - check_first = check_first) + check_first = check_first, + keep_prov = keep_prov) set_rights_holder(mn, response[["resource_map"]], metadata_sysmeta@rightsHolder) - log_message("Updated resource map") # Update the parent resource map to add the new package ####################################################### @@ -417,12 +556,10 @@ publish_update <- function(mn, stop("Missing required parameters to update parent package.") } - log_message("Updating parent resource map...") - # Check to see if the just-updated package is in the list of # parent_child_pids, notify the user, and add it to the list if (!(resmap_updated_pid %in% parent_child_pids)) { - log_message("Adding the new resource map to the list of child PIDs in the parent package.") + message("Adding the new resource map to the list of child PIDs in the parent package.") parent_child_pids <- c(parent_child_pids, resmap_updated_pid) } @@ -432,7 +569,8 @@ publish_update <- function(mn, data_pids = parent_data_pids, child_pids = parent_child_pids, public = public, - check_first = check_first) + check_first = check_first, + keep_prov = keep_prov) set_rights_holder(mn, response[["parent_resource_map"]], metadata_sysmeta@rightsHolder) } @@ -444,42 +582,51 @@ publish_update <- function(mn, } -#' Create a resource map Object on a Member Node. +#' Create a resource map object on a Member Node #' #' This function first generates a new resource map RDF/XML document locally and -#' then uses the dataone::createObject function to create the Object on the +#' then uses the [dataone::createObject()] function to create the object on the #' specified MN. #' -#' If you only want to generate resource map RDF/XML, see -#' \code{\link{generate_resource_map}} +#' If you only want to generate resource map RDF/XML, see [generate_resource_map()]. #' #' @param mn (MNode) The Member Node -#' @param metadata_pid (character) The PID of the metadata object to go in the -#' package. -#' @param data_pids (character) The PID(s) of the data objects to go in the -#' package. +#' @param metadata_pid (character) The PID of the metadata object to go in the package. +#' @param data_pids (character) The PID(s) of the data objects to go in the package. #' @param child_pids (character) The resource map PIDs of the packages to be #' nested under the package. #' @param check_first (logical) Optional. Whether to check the PIDs passed in as -#' aruments exist on the MN before continuing. This speeds up the function, +#' arguments exist on the MN before continuing. This speeds up the function, #' especially when `data_pids` has many elements. +#' @param ... Additional arguments that can be passed into [publish_object()]. +#' +#' @return (character) The PID of the created resource map. #' -#' @return (character) The created resource map's PID #' @export #' #' @examples - +#'\dontrun{ +#' cn <- CNode('STAGING2') +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' +#' meta_pid <- 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe' +#' dat_pid <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', +#' 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') +#' +#' create_resource_map(mn, metadata_pid = meta_pid, data_pids = dat_pid) +#'} create_resource_map <- function(mn, metadata_pid, - data_pids=NULL, - child_pids=NULL, - check_first=TRUE) { - stopifnot(class(mn) == "MNode") + data_pids = NULL, + child_pids = NULL, + check_first = TRUE, + ...) { + stopifnot(is(mn, "MNode")) stopifnot(is.character(metadata_pid), nchar(metadata_pid) > 0) if (check_first) { - log_message("Checking all the object passed in as arguments exist before going on...") + message("Checking all the object passed in as arguments exist before going on...") stopifnot(object_exists(mn, metadata_pid)) if (!is.null(data_pids)) @@ -499,7 +646,8 @@ create_resource_map <- function(mn, actual <- publish_object(mn, path, pid, - format_id = "http://www.openarchives.org/ore/terms") + format_id = "http://www.openarchives.org/ore/terms", + ...) stopifnot(pid == actual) @@ -507,63 +655,79 @@ create_resource_map <- function(mn, } -#' Update an existing resource map Object on a Member Node. +#' Update an existing resource map object on a Member Node #' #' This function first generates a new resource map RDF/XML document locally and -#' then uses the dataone::updateObject function to update an Object on the +#' then uses the [dataone::updateObject()] function to update an object on the #' specified MN. #' -#' If you only want to generate resource map RDF/XML, see -#' \code{\link{generate_resource_map}}. +#' If you only want to generate resource map RDF/XML, see [generate_resource_map()]. #' -#' This function also can be used to be used to add a new child packages to a -#' parent package. For exmaple, if you have: +#' This function also can be used to add a new child packages to a +#' parent package. For example, if you have: #' #' Parent A B #' -#' and want to add C as a sibling package to A and B, e.g. +#' and want to add C as a sibling package to A and B, e.g.: #' #' Parent A B C #' -#' you could use this function. +#' then you could use this function. #' -#' Note: This function currently replaces the rightsHolder on the Resource Map +#' Note: This function currently replaces the rightsHolder on the resource map #' temporarily to allow updating but sets it back to the rightsHolder that was #' in place before the update. #' -#' @param mn -#' @param metadata_pid -#' @param data_pids -#' @param child_pids -#' @param public Whether or not to make the new resource map public read -#' (logical) +#' @param mn (MNode) The Member Node. +#' @param metadata_pid (character) The PID of the metadata object to go in the package. +#' @param data_pids (character) The PID(s) of the data objects to go in the package. +#' @param child_pids (character) The resource map PIDs of the packages to be +#' nested under the package. +#' @param public (logical) Whether or not to make the new resource map public read. #' @param check_first (logical) Optional. Whether to check the PIDs passed in as -#' aruments exist on the MN before continuing. This speeds up the function, +#' arguments exist on the MN before continuing. This speeds up the function, #' especially when `data_pids` has many elements. -#' @param resource_map_pid -#' @param other_statements (data.frame) Extra statements to add to the Resource Map. -#' @param identifier +#' @param resource_map_pid (character) The PID of the resource map to be updated. +#' @param other_statements (data.frame) Extra statements to add to the resource map. +#' @param identifier (character) Manually specify the identifier for the new metadata object. +#' @param keep_prov (character) Option to force prov to be forwarded into new resource map +#' +#' @return (character) The PID of the updated resource map. #' #' @export +#' +#' @examples +#'\dontrun{ +#' cn <- CNode('STAGING2') +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' +#' rm_pid <- "resource_map_urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" +#' meta_pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" +#' data_pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' +#' rm_new <- update_resource_map(mn, rm_pid, meta_pid, data_pids) +#'} update_resource_map <- function(mn, resource_map_pid, metadata_pid, - data_pids=NULL, - child_pids=NULL, - other_statements=NULL, - identifier=NULL, - public=FALSE, - check_first=TRUE) { + data_pids = NULL, + child_pids = NULL, + other_statements = NULL, + identifier = NULL, + public = TRUE, + check_first = TRUE, + keep_prov = FALSE) { # Check arguments - stopifnot(class(mn) == "MNode") + stopifnot(is(mn, "MNode")) stopifnot(is.character(resource_map_pid), nchar(resource_map_pid) > 0) stopifnot(is.character(metadata_pid), nchar(metadata_pid) > 0) if (check_first) { - log_message("Checking all the object passed in as arguments exist before going on...") + message("Checking all the object passed in as arguments exist before going on...") stopifnot(object_exists(mn, resource_map_pid)) stopifnot(object_exists(mn, metadata_pid)) @@ -578,7 +742,7 @@ update_resource_map <- function(mn, # Get the current rightsHolder sysmeta <- dataone::getSystemMetadata(mn, resource_map_pid) - stopifnot(class(sysmeta) == "SystemMetadata") + stopifnot(is(sysmeta, "SystemMetadata")) previous_rights_holder <- sysmeta@rightsHolder @@ -597,23 +761,60 @@ update_resource_map <- function(mn, other_statements) } + prov_pids <- gsub("https://cn-stage-2.test.dataone.org/cn/v[0-9]/resolve/|https://cn.dataone.org/cn/v[0-9]/resolve/|https://cn-stage.test.dataone.org/cn/v[0-9]/resolve/", + "", + c(statements$subject, statements$object)) %>% + gsub("%3A", ":", .) + prov_pids <- prov_pids[-(grep("^http", prov_pids))] %>% # might need to catch other things besides URLs + unique(.) + # Create the replacement resource map if (is.null(identifier)) { identifier <- paste0("resource_map_", new_uuid()) } - new_rm_path <- generate_resource_map(metadata_pid = metadata_pid, + if (keep_prov == FALSE){ + if (is.null(prov_pids)){ + new_rm_path <- generate_resource_map(metadata_pid = metadata_pid, + data_pids = data_pids, + child_pids = child_pids, + resource_map_pid = identifier) + } + else if (any(prov_pids %in% data_pids == FALSE)){ + warning("Old provenance contains data pids not in new resource map. Provenance information will be removed. \n + You can get old provenance statements back using: + old_prov <- recover_prov(mn, rm_pid) + rm_new <- update_resource_map(mn, rm_pid, metadata_pid, data_pids, other_statements = old_prov, keep_prov = T)") + + new_rm_path <- generate_resource_map(metadata_pid = metadata_pid, data_pids = data_pids, child_pids = child_pids, - other_statements = statements, resource_map_pid = identifier) + } + else if (all(prov_pids %in% data_pids) == TRUE) { + new_rm_path <- generate_resource_map(metadata_pid = metadata_pid, + data_pids = data_pids, + child_pids = child_pids, + other_statements = statements, + resource_map_pid = identifier) + } + } + else if (keep_prov == TRUE) { + if (any(prov_pids %in% data_pids == FALSE)){ + warning("Old provenance contains data pids not in new resource map. Provenance information is retained since keep_prov is set to TRUE") + } + new_rm_path <- generate_resource_map(metadata_pid = metadata_pid, + data_pids = data_pids, + child_pids = child_pids, + other_statements = statements, + resource_map_pid = identifier) + } stopifnot(file.exists(new_rm_path)) rm(sysmeta) - log_message(paste0("Getting updated copy of System Metadata for ", resource_map_pid)) sysmeta <- dataone::getSystemMetadata(mn, resource_map_pid) - stopifnot(class(sysmeta) == "SystemMetadata") + stopifnot(is(sysmeta, "SystemMetadata")) new_rm_sysmeta <- sysmeta new_rm_sysmeta@identifier <- identifier @@ -624,16 +825,18 @@ update_resource_map <- function(mn, new_rm_sysmeta@obsoletes <- resource_map_pid slot(new_rm_sysmeta, "obsoletedBy", check = FALSE) <- NA + # Set the replication policy back to default + new_rm_sysmeta <- clear_replication_policy(new_rm_sysmeta) + new_rm_sysmeta <- add_admin_group_access(new_rm_sysmeta) if (public) { new_rm_sysmeta <- datapack::addAccessRule(new_rm_sysmeta, "public", "read") } else { - new_rm_sysmeta <- remove_public_access(new_rm_sysmeta) + new_rm_sysmeta <- datapack::removeAccessRule(new_rm_sysmeta, "public", "read") } # Update it - log_message(paste0("Updating resource map...")) resmap_update_response <- dataone::updateObject(mn, pid = resource_map_pid, newpid = identifier, @@ -648,22 +851,32 @@ update_resource_map <- function(mn, file.remove(new_rm_path) } - log_message(paste0("Successfully updated ", resource_map_pid, " with ", identifier, ".")) + message(paste0("Successfully updated ", resource_map_pid, " with ", identifier, ".")) return(resmap_update_response) } -#' Set the file name on an object +#' Set the file name for an object +#' +#' Set the file name for an object. #' #' @param mn (MNode) The Member Node. #' @param pid (character) The PID of the object to set the file name on. #' @param name (character) The file name. #' -#' @return (logical) Whether the update succeeded, FALSE means there was an error. +#' @return (logical) Whether the update succeeded. +#' #' @export #' #' @examples +#'\dontrun{ +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn, "urn:node:mnTestKNB") +#' +#' pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" +#' set_file_name(mn, pid, "myfile.csv") +#' } set_file_name <- function(mn, pid, name) { stopifnot(is(mn, "MNode")) stopifnot(is.character(pid), @@ -672,6 +885,70 @@ set_file_name <- function(mn, pid, name) { nchar(name) > 0) sysmeta <- dataone::getSystemMetadata(mn, pid) + + if (!is.na(sysmeta@fileName)) { + if (sysmeta@fileName == name) + stop(paste0("fileName for object ", pid, "is already set to: ", name)) + } + sysmeta@fileName <- name dataone::updateSystemMetadata(mn, pid, sysmeta) } + + + +#' Helper for publish_object. Reformat the filName in system metadata. +#' +#' Reformat the fileName field in an object's system metadata to follow Arctic Data Center +#' system metdata naming conventions. Publish_object calls this function to rename +#' the fileName field in system metadata. +#' +#' @param path (character) full file path +#' @param sysmeta (S4) A system metadata object +#' +reformat_file_name <- function(path, sysmeta) { + base_name <- basename(path) + if (sysmeta@formatId == 'http://www.openarchives.org/ore/terms') { + ext <- '.rdf.xml' + } else if (grepl('ecoinformatics\\.org/eml*', sysmeta@formatId)) { + ext <- '.xml' + # remove extension then truncate to 50 characters + base_name <- tools::file_path_sans_ext(base_name) %>% + stringr::str_sub(1, 50) + # re-trim if we're in the middle of a word and add extension back on + index <- stringi::stri_locate_last_fixed(base_name, ' ')[1] + # Set index to the end of the string if there are no spaces. Add + 1 because str_sub subtracts one to remove the white space. + if (is.na(index)) index <- nchar(base_name) + 1 + base_name <- stringr::str_sub(base_name, 1, index -1) %>% + paste0(ext) + } else { + ext <- paste0('.', tools::file_ext(base_name)) + } + + file_name <- stringr::str_replace_all(base_name, '[^[:alnum:]]', '_') %>% + stringr::str_replace_all('_[_]*', '_') %>% # replaces consecutive underscores with one + stringr::str_sub(end = -(nchar(ext) + 1)) %>% + paste0(ext) + + return(file_name) +} + +#' Get a data.frame of prov statements from a resource map pid. +#' +#' This is a function that is useful if you need to recover lost prov statements. It returns +#' a data.frame of statements that can be passed to `update_resource_map` in the `other_statements` +#' argument. +#' +#' @param mn (mn) A memeber node instance +#' @param rm_pid (character) A resource map identifier +#' @return a data.frame of prov statments +#' @export +recover_prov <- function(mn, rm_pid){ + old_resource_map_path <- tempfile() + writeLines(rawToChar(dataone::getObject(mn, rm_pid)), old_resource_map_path) + statements <- parse_resource_map(old_resource_map_path) + statements <- filter_packaging_statements(statements) + unlink(old_resource_map_path) + return(statements) +} + diff --git a/R/eml.R b/R/eml.R index ee540c1..2e746e0 100644 --- a/R/eml.R +++ b/R/eml.R @@ -1,243 +1,158 @@ -#' eml.R -#' -#' Helpers for creating EML. - +# Helper functions for creating EML metadata -#' Create EML otherEntity objects for a set of PIDs -#' -#' Note this is a wrapper around sysmeta_to_other_entity which handles the task of -#' creating the EML otherEntity. +#' Create EML entity with physical section from a DataONE PID #' #' @param mn (MNode) Member Node where the PID is associated with an object. -#' @param pids (character) The PID of the object to create the sub-tree for. +#' @param pid (character) The PID of the object to create the sub-tree for. +#' @param entity_type (character) What kind of object to create from the input. One of "dataTable", +#' "spatialRaster", "spatialVector", "storedProcedure", "view", or "otherEntity". +#' @param ... (optional) Additional arguments to be passed to \code{eml$entityType())}. +#' +#' @return (list) The entity object. #' -#' @return (list of otherEntity) The otherEntity object(s) #' @export #' #' @examples #' \dontrun{ -#' # Generate EML otherEntity objects for all the data in a package -#' pkg <- get_package(mn, pid) -#' pid_to_other_entity(mn, pkg$data) +#' # Generate EML otherEntity +#' pid_to_eml_entity(mn, +#' pid, +#' entity_type = "otherEntity", +#' entityName = "Entity Name", +#' entityDescription = "Description about entity") #' } -pid_to_eml_other_entity <- function(mn, pids) { - stopifnot(class(mn) == "MNode") - stopifnot(is.character(pids), - all(nchar(pids)) > 0) +pid_to_eml_entity <- function(mn, + pid, + entity_type = "otherEntity", + ...) { + + stopifnot(is(mn, "MNode")) + stopifnot(is.character(pid), + nchar(pid) > 0, + length(pid) == 1) + + stopifnot(entity_type %in% c("dataTable", + "spatialRaster", + "spatialVector", + "storedProcedure", + "view", + "otherEntity")) + + systmeta <- getSystemMetadata(mn, pid) + + entity <- list(physical = pid_to_eml_physical(mn, pid), ...) + + # Set entity slots + if (length(entity$id) == 0) { + # entity$id <- list(xml_attribute = systmeta@identifier) + entity$id <- systmeta@identifier + } - sysmeta <- lapply(pids, function(pid) { getSystemMetadata(mn, pid) }) - sysmeta_to_eml_other_entity(sysmeta) -} + if (length(entity$scope) == 0) { + #entity$scope <- list(xml_attribute = "document") + entity$scope <- "document" + } -#' This function is deprecated. See \link{pid_to_other_eml_entity}. -#' -#' @param mn (MNode) -#' @param pids (character) -#' -#' @return -#' @export -sysmeta_to_other_entity <- function(mn, pids) { - .Deprecated("pid_to_other_eml_entity", - package = "arcticdtautils", - old = "pid_to_other_entity") + if (length(entity$entityName) == 0) { + + if (!is.na(systmeta@fileName)) { + entity$entityName <- systmeta@fileName + } + } + + if (entity_type == "otherEntity" && length(entity$entity_type) == 0) { + entity$entityType <- "Other" + } + else if (entity_type != "otherEntity"){ + entity$entityType <- NULL + } + + return(entity) } + #' Create EML physical objects for the given set of PIDs #' -#' Note this is a wrapper around sysmeta_to_eml_physical which handles the task of -#' creating the EML physical +#' This is a wrapper around [sysmeta_to_eml_physical()] which handles the task of +#' creating the EML physical. #' #' @param mn (MNode) Member Node where the PID is associated with an object. -#' @param pids (character) The PID of the object to create the sub-tree for. +#' @param pid (character) The PID of the object to create the physical for. #' -#' @return (list of otherEntity) The otherEntity object(s) -#' @export +#' @return (list) A physical object. #' -#' @examples -#' \dontrun{ -#' # Generate EML physical objects for all the data in a package -#' pkg <- get_package(mn, pid) -#' pid_to_eml_physical(mn, pkg$data) -#' } -pid_to_eml_physical <- function(mn, pids) { - stopifnot(class(mn) == "MNode") - stopifnot(is.character(pids), - all(nchar(pids)) > 0) - - sysmeta <- lapply(pids, function(pid) { getSystemMetadata(mn, pid) }) - sysmeta_to_eml_physical(sysmeta) -} - -#' Create an EML otherEntity for the given object from the System Metadata -#' -#' @param sysmeta (SystemMetadata) One or more System Metadata objects -#' -#' @return (list of otherEntity) The otherEntity object(s) #' @export #' #' @examples #' \dontrun{ -#' # Generate EML otherEntity objects for all the data in a package -#' pkg <- get_package(mn, pid) -#' sm <- lapply(pkg$data, function(pid) { getSystemMetadata(mn, pid) }) -#' sysmeta_to_other_entity(sm) -#'} -sysmeta_to_eml_other_entity <- function(sysmeta) { - work <- function(x) { - other_entity <- new("otherEntity") - other_entity@id <- new("xml_attribute", x@identifier) - other_entity@scope <- new("xml_attribute", "document") - - if (is.na(x@fileName)) { - other_entity@entityName <- new("entityName", "NA") - } - else { - other_entity@entityName <- new("entityName", x@fileName) - } - - other_entity@entityType <- "Other" - - phys <- sysmeta_to_eml_physical(x) - other_entity@physical <- new("ListOfphysical", phys) - - other_entity - } - +#' # Generate EML physical sections for an object in a data package +#' phys <- pid_to_eml_physical(mn, pid) +#' } +pid_to_eml_physical <- function(mn, pid) { + stopifnot(is(mn, "MNode")) + stopifnot(is.character(pid), + all(nchar(pid)) > 0, + length(pid) == 1) + names(pid) <- '' # Named inputs produce a named output list - which is invalid in EML - if (!is(sysmeta, "list")) sysmeta <- list(sysmeta) + sysmeta <- getSystemMetadata(mn, pid) - lapply(sysmeta, work) + sysmeta_to_eml_physical(sysmeta) } -#' This function is deprecated. See \link{sysmeta_to_eml_other_entity}. -#' -#' @param sysmeta (SystemMetadata) -#' -#' @return -#' @export -sysmeta_to_other_entity <- function(sysmeta) { - .Deprecated("sysmeta_to_eml_other_entity", - package = "arcticdtautils", - old = "sysmeta_to_other_entity") -} - -#' Create an EML physical object from System Metadata +#' Create an EML physical object from system metadata #' #' This function creates a pre-canned EML physical object from what's in the -#' System Metadata of an Object. Note that it sets an Online Distrubtion URL +#' System Metadata of an object. Note that it sets an Online Distribution URL #' of the DataONE v2 resolve service for the PID. #' -#' @param sysmeta (SystemMetadata) One or more System Metadata objects +#' @param sysmeta (SystemMetadata) One or more System Metadata objects. +#' +#' @return (list) A list of physical objects. #' -#' @return (list of physical) The physical objects for each sysmeta #' @export #' #' @examples -#' #' \dontrun { -#' # Generate EML physical objects for all the data in a package -#' pkg <- get_package(mn, pid) -#' sm <- lapply(pkg$data, function(pid) { getSystemMetadata(mn, pid) }) +#' \dontrun{ +#' # Generate EML physical object from a system metadata object +#' sm <- getSystemMetadata(mn, pid) #' sysmeta_to_eml_physical(sm) #' } sysmeta_to_eml_physical <- function(sysmeta) { - work <- function(x) { - phys <- new("physical") - phys@scope <- new("xml_attribute", "document") + stopifnot(is(sysmeta, "SystemMetadata")) - if (is.na(x@fileName)) { - phys@objectName <- new("objectName", "NA") + if (is.na(sysmeta@fileName)) { + ob_name <- "NA" } else { - phys@objectName <- new("objectName", x@fileName) + ob_name <- sysmeta@fileName } - phys@size <- new("size", format(x@size, scientific = FALSE)) - phys@size@unit <- new("xml_attribute", "bytes") - - phys@authentication <- new("ListOfauthentication", list(new("authentication", x@checksum))) - phys@authentication[[1]]@method <- new("xml_attribute", x@checksumAlgorithm) - phys@dataFormat <- new("dataFormat") - phys@dataFormat@externallyDefinedFormat <- new("externallyDefinedFormat") - phys@dataFormat@externallyDefinedFormat@formatName <- x@formatId + phys <- set_physical(objectName = ob_name, + size = format(sysmeta@size, scientific = FALSE), + sizeUnit = "bytes", + authentication = sysmeta@checksum, + authMethod = sysmeta@checksumAlgorithm, + url = paste0("https://cn.dataone.org/cn/v2/resolve/", sysmeta@identifier)) - phys@distribution <- new("ListOfdistribution", list(new("distribution"))) - phys@distribution[[1]]@scope <- new("xml_attribute", "document") - phys@distribution[[1]]@online <- new("online") - phys@distribution[[1]]@online@url <- new("url", paste0("https://cn.dataone.org/cn/v2/resolve/", x@identifier)) - - slot(phys@distribution[[1]]@online@url, "function") <- new("xml_attribute", "download") + phys$dataFormat <- list(externallyDefinedFormat = list(formatName = sysmeta@formatId)) phys - } - - if (!is(sysmeta, "list")) sysmeta <- list(sysmeta) - - lapply(sysmeta, work) } -#' Creates and sets EML otherEntity elements to an existing EML document, -#' replacing any existing otherEntities -#' -#' This function is slow because it needs get the System Metadata for each -#' element of `pids` in order to get the fileName, checksum, etc. -#' -#' @param mn (MNode) The Member Node the objects exist on. -#' @param path (character) The location on disk of the EML file. -#' @param pids (character) One or more PIDs for the objects. -#' -#' @return (character) The path to the updated EML file. -#' @export -#' -#' @examples -#' \dontrun{ -#' mn <- MNode(...) # Set up a connection to an MN -#' eml_path <- "/path/to/your/eml.xml" -#' set_other_entities(mn, eml_path, "a_data_pid") -#' } -set_other_entities <- function(mn, path, pids) { - stopifnot(class(mn) == "MNode") - stopifnot(file.exists(path)) - stopifnot(all(is.character(pids)), - all(nchar(pids) > 0)) - - if (length(pids) == 0) { - message("Skipped adding EML otherEntity elements because no pids were specified.") - return(path) - } - - # Get the metadata document from the MN and load it as an EML document - doc <- EML::read_eml(path) - stopifnot(class(doc) == "eml") - - message("Setting EML otherEntity elements. This can take a while if there are lots of PIDs...") - - # Generate otherEntity elements - other_entities <- pid_to_other_entity(mn, pids) - - # Concatenate the existing and new otherEntity elements and put back in the - # EML - if (length(other_entities) > 0) { - doc@dataset@otherEntity <- new("ListOfotherEntity", other_entities) - } - - # Write the modified document back to disk and stop - EML::write_eml(doc, path) - stopifnot(EML::eml_validate(path) == TRUE) - - path -} #' Get the Metacat docid for the given identifier #' +#' Get the Metacat docid for the given identifier. +#' #' @param sysmeta (SystemMetadata) The sysmeta of the object you want to find. #' -#' @return (character) The docid -#' @export +#' @return (character) The docid. #' -#' @examples +#' @noRd get_doc_id <- function(sysmeta) { - stopifnot(class(sysmeta) == "SystemMetadata") + stopifnot(is(sysmeta, "SystemMetadata")) message("Looking up docid for ", sysmeta@identifier, ".") @@ -272,180 +187,163 @@ get_doc_id <- function(sysmeta) { doc_id } -#' Adds a step to the methods document -#' -#' @param doc (eml) The EML document to add the method step to. -#' @param title (character) The title of the method step. -#' @param description (character) The description of the method. -#' -#' @return (eml) The modified EML document -#' @export -#' -#' @examples -add_methods_step <- function(doc, title, description) { - stopifnot(is(doc, "eml")) - stopifnot(is(doc@dataset, "dataset")) - stopifnot(is.character(title), - nchar(title) > 0) - stopifnot(is.character(description), - nchar(description) > 0) - - new_step <- new("methodStep", - description = new("description", - section = new("section", list(newXMLNode("title", title), - newXMLNode("para", description))))) - doc@dataset@methods@methodStep[[length(doc@dataset@methods@methodStep) + 1]] <- new_step - - doc -} - -#' Clear all methods from the document. -#' -#' @param doc (eml) The document to clear methods from. +#' Create an EML party #' -#' @return (eml) The modified document. -#' @export +#' You will usually want to use the high-level functions such as +#' [eml_creator()] and [eml_contact()] but using this is fine. #' -#' @examples -clear_methods <- function(doc) { - stopifnot(is(doc, "eml")) - - # Clear the methods out - doc@dataset@methods <- new("MethodsType") - - doc -} - -#' Low-level helper for creating EML parties +#' The `userId` argument assumes an ORCID so be sure to adjust for that. #' -#' You usually will want to use the high-level functions such as -#'\code{\link{eml_creator}} and \code{\link{eml_contact}} but using this is -#' fine. +#' @param type (character) The type of party (e.g. 'contact'). +#' @param given_names (character) The party's given name(s). +#' @param sur_name (character) The party's surname. +#' @param organization (character) The party's organization name. +#' @param position (character) The party's position. +#' @param email (character) The party's email address(es). +#' @param phone (character) The party's phone number(s). +#' @param address (character) The party's address(es) as a valid EML address +#' @param userId (character) The party's ORCID, in format https://orcid.org/WWWW-XXXX-YYYY-ZZZZ. +#' @param role (character) The party's role. #' -#' @param type (character) The type of party (e.g. 'contact') -#' @param given_names (character) The party's given name(s) -#' @param sur_name (character) The party's surname -#' @param organization (character) The party's organization name -#' @param position (character) The party's position -#' @param email (character) The party's email address(es) -#' @param phone (character) The party's phone number(s) -#' @param address (character) The party's address(es) -#' @param userId (character) The party's ORCID, in format https://orcid.org/WWWW-XXXX-YYYY-ZZZZ -#' @param role (character) The party's role +#' @return (party) An instance of the party specified by the `type` argument. #' -#' @return An instance of the party specified by the in \code{type} argument #' @export #' #' @examples -#' eml_party("creator", "Test", "User) +#' \dontrun{ +#' eml_party("creator", "Test", "User") +#' eml_party("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766") +#' eml_party("creator", given_names = list("Dominic", "'Dom'"), +#' sur_name = "Mullen", list("NCEAS", "UCSB"), +#' position = list("Data Scientist", "Programmer"), +#' address = eml$address(deliveryPoint = "735 State St", +#' city = "Santa Barbara", +#' administrativeArea = "CA", +#' postalCode = "85719")) +#'} eml_party <- function(type="associatedParty", - given_names=NULL, - sur_name=NULL, - organization=NULL, - position=NULL, - email=NULL, - phone=NULL, - address=NULL, + given_names = NULL, + sur_name = NULL, + organization = NULL, + position = NULL, + email = NULL, + phone = NULL, + address = NULL, userId = NULL, - role=NULL) { + role = NULL) { if (all(sapply(c(sur_name, organization, position), is.null))) { stop(call. = FALSE, "You must specify at least one of sur_name, organization, or position to make a valid creator") } + if (!is.null(address) & + !"deliveryPoint" %in% names(address) & + !"administrativeArea" %in% names(address) & + !"postalCode" %in% names(address) & + !"city" %in% names(address)) { + stop(call. = FALSE, + "An address was given but no deliveryPoint, administrativeArea, city, or postalCode child elements were specified.") + } - party <- new(type) + party <- list() # Individual Name if (!is.null(sur_name)) { - party@individualName <- c(eml_individual_name(given_names, sur_name)) + party$individualName <- list(givenName = given_names, surName = sur_name) } # Organization Name if (!is.null(organization)) { - party@organizationName <- c(new("organizationName", .Data = organization)) + party$organizationName <- organization } # Position if (!is.null(position)) { - party@positionName <- c(new("positionName", .Data = position)) + party$positionName <- position } # Email if (!is.null(email)) { - party@electronicMailAddress <- new("ListOfelectronicMailAddress", lapply(email, function(x) { new("electronicMailAddress", .Data = x )})) + party$electronicMailAddress <- email } # Address if (!is.null(address)) { - # Upgade to a ListOfaddress if needed - if (is(address, "address")) { - address <- c(address) - } - - party@address <- address + party$address <- address } # Phone if (!is.null(phone)) { - # Upgrade to phone is needed - if (is.character(phone)) { - phone <- new("ListOfphone", lapply(phone, function(x) as(x, "phone"))) - } - - # Upgade to a ListOfphone if needed - if (is(phone, "phone")) { - phone <- c(phone) - } - - party@phone <- phone + party$phone <- phone } # userId if (!is.null(userId)) { - party@userId <- c(new("userId", .Data = userId, directory="https://orcid.org")) - #need to put warning statement here + # Warn if the userId doesn't look like an ORCID + if (!grepl("^https:\\/\\/orcid\\.org", userId)) { + warning(paste0("The provided `userId` of '", userId, "' does not look like an ORCID and the `userId` argument assumes the given `userId` is an ORCID. ORCIDs should be passed in like https://orcid.org/WWWW-XXXX-YYYY-ZZZZ.")) + } + + party$userId$userId <- userId + party$userId$directory = "https://orcid.org" } # Role if (!is.null(role)) { - if (type != "associatedParty") { + # Only allow roles to be set if type is associatedParty or personnel + if (type != "associatedParty" && type != "personnel") { stop(call. = FALSE, - paste0("Setting a role is only valid on an associatedParty, not a ", type, ".")) + paste0("Setting a role is only valid on an associatedParty or personnel, not a ", type, ".")) } - party@role <- new("role", .Data = role) + party$role <- role } + + party } + #' Create an EML creator #' -#' See \code{\link{eml_party}} for details. +#' See [eml_party()] for details. +#' +#' @param ... Arguments passed on to [eml_party()]. #' -#' @param ... Arguments passed on to eml_party +#' @return (creator) The new creator. #' -#' @return (creator) The new creator #' @export #' #' @examples -#' eml_creator("test", "user", email = "test@user.com") +#' \dontrun{ +#' eml_creator("test", "user", email = "test@@user.com") +#' eml_creator("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766") +#' eml_creator("creator", c("Dominic", "'Dom'"), "Mullen", c("NCEAS", "UCSB"), +#' c("Data Scientist", "Programmer")) +#'} eml_creator <- function(...) { eml_party("creator", ...) } + #' Create an EML contact #' -#' See \code{\link{eml_party}} for details. +#' See [eml_party()] for details. +#' +#' @param ... Arguments passed on to [eml_party()]. #' -#' @param ... Arguments passed on to eml_party +#' @return (contact) The new contact. #' -#' @return (contact) The new contact #' @export #' #' @examples -#' eml_contact("test", "user", email = "test@user.com") +#' \dontrun{ +#' eml_contact("test", "user", email = "test@@user.com") +#' eml_creator("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766") +#' eml_creator("creator", c("Dominic", "'Dom'"), "Mullen", c("NCEAS", "UCSB"), +#' c("Data Scientist", "Programmer")) +#'} eml_contact <- function(...) { eml_party("contact", ...) } @@ -453,232 +351,270 @@ eml_contact <- function(...) { #' Create an EML metadataProvider #' -#' See \code{\link{eml_party}} for details. +#' See [eml_party()] for details. +#' +#' @param ... Arguments passed on to [eml_party()]. #' -#' @param ... Arguments passed on to eml_party +#' @return (metadataProvider) The new metadataProvider. #' -#' @return (metadataProvider) The new metadataProvider #' @export #' #' @examples -#' eml_metadata_provider("test", "user", email = "test@user.com") +#' eml_metadata_provider("test", "user", email = "test@@user.com") eml_metadata_provider <- function(...) { eml_party("metadataProvider", ...) } + #' Create an EML associatedParty #' -#' See \code{\link{eml_party}} for details. +#' See [eml_party()] for details. +#' +#' @param ... Arguments passed on to [eml_party()]. #' -#' @param ... Arguments passed on to eml_party +#' @return (associatedParty) The new associatedParty. #' -#' @return (associatedParty) The new associatedParty #' @export #' #' @examples -#' eml_associated_party("test", "user", email = "test@user.com", role = "Principal Investigator") +#' eml_associated_party("test", "user", email = "test@@user.com", role = "Principal Investigator") eml_associated_party <- function(...) { eml_party("associatedParty", ...) } -#' Create an EML individualName section + + +#' Create an EML personnel #' -#' @param given_names (character) One or more given names. -#' @param sur_name (character) A sur (last) name. +#' See [eml_party()] for details. +#' +#' @param ... Arguments passed on to [eml_party()]. +#' @param role (character) Personnel role, e.g. "principalInvestigator". +#' +#' @return (personnel) The new personnel. #' -#' @return (individualName) The new individualName section #' @export #' #' @examples -#' eml_individual_name("some", "user) -eml_individual_name <- function(given_names=NULL, sur_name) { - stopifnot(is.character(sur_name) && nchar(sur_name) > 0) - - # Create - indiv_name <- new("individualName") - - if (!is.null(given_names)) { - stopifnot(all(sapply(given_names, is.character))) - stopifnot(all(lengths(given_names) > 0)) - - givens <- lapply(given_names, function(given_name) { - x <- new("givenName") - x@.Data <- given_name - x - }) - - indiv_name@givenName <- new("ListOfgivenName", givens) +#' eml_personnel("test", "user", email = "test@@user.com", role = "principalInvestigator") +eml_personnel <- function(role = NULL, ...) { + if (is.null(role)) { + stop(call. = FALSE, + "You must specify a role for a personnel.") } - indiv_name@surName <- new("surName", .Data = sur_name) - - indiv_name + eml_party("personnel", role = role, ...) } - -#' Create an eml-project section. +#' Create an EML project section +#' +#' Create an EML project section. #' -#' Note: This is super-limited right now. +#' Note - studyAreaDescription, designDescription, and relatedProject are not +#' fully fleshed out. Need to pass these objects in directly if you want to use +#' them. #' -#' @param title (character) Title of the project. -#' @param awards (character) One or more awards for the project. -#' @param first (character) First name of the person with role `role`. -#' @param last (character) Last name of the person with role `role`. -#' @param organizations (character) Optional. One or more organization strings. -#' @param role (character) Optional. Specify an alternate role. +#' @param title (character) Title of the project (Required). May have multiple titles +#' constructed using `list`. +#' @param personnelList (list of personnel) Personnel involved with the project. +#' @param abstract (character) Project abstract. Can pass as a list +#' for separate paragraphs. +#' @param funding (character) Funding sources for the project such as grant and +#' contract numbers. Can pass as a list for separate paragraphs. +#' @param studyAreaDescription (studyAreaDescription) +#' @param designDescription (designDescription) +#' @param relatedProject (project) #' #' @return (project) The new project section. +#' #' @export #' #' @examples -#' eml_project("Some title", "51231", "Some", "User") -eml_project <- function(title, awards, first, last, organizations = NULL, role = "originator") { - stopifnot(all(sapply(c(title, awards, first, last), is.character)), - all(lengths(c(title, awards, first, last)) > 0)) +#' proj <- eml_project(list("Some title", "A second title if needed"), +#' list(eml_personnel("Bryce", "Mecum", role = "principalInvestigator")), +#' list("Abstract paragraph 1", "Abstract paragraph 2"), +#' "Funding Agency: Award Number 12345") +eml_project <- function(title, + personnelList, + abstract = NULL, + funding = NULL, + studyAreaDescription = NULL, + designDescription = NULL, + relatedProject = NULL) { + + + if (is.null(eml_get_simple(personnelList, "role"))) { + stop(call. = FALSE, + "Each person in the personnelList must have a role.") + } - # project - project <- new("project") + project <- list() - # title - title_ele <- new("title") - title_ele@.Data <- title - project@title <- new("ListOftitle", list(title_ele)) + # Title + project$title <- title - # personnel - personnel <- new("personnel") + project$personnel <- personnelList - # individualName - personnel@individualName <- new("ListOfindividualName", list(eml_individual_name(first, last))) + # Abstract + if (!is.null(abstract)) { + project$abstract <- list(para = abstract) + } - # organizationName - if (!is.null(organizations)) { - organizations <- lapply(organizations, function(org) { o <- new("organizationName"); o@.Data <- org; o } ) - personnel@organizationName <- new("ListOforganizationName", organizations) + # Funding + if (!is.null(funding)) { + project$funding <- list(para = funding) } - # role - personnel@role <- new("ListOfrole", list(new("role", role))) + # Study area description + if (!is.null(studyAreaDescription)) { + project$studyAreaDescription <- studyAreaDescription + } - project@personnel <- new("ListOfpersonnel", list(personnel)) + # Design description + if (!is.null(designDescription)) { + project$designDescription <- designDescription + } - # funding - funding_paras <- lapply(awards, function(awd) { - a <- new("para"); - a@.Data <- list(awd); - a@.Data <- list(xml2::xml_new_root("para", as.character(awd))) - a - }) - project@funding@para <- new("ListOfpara", funding_paras) + # Related Project + if (!is.null(relatedProject)) { + project$relatedProject <- relatedProject + } project } +#' Create an EML geographicCoverage section +#' +#' A simple way to create an EML geographicCoverage section. +#' +#' For a bounding box, all coordinates should be unique. +#' For a single point, the North and South bounding coordinates should be the same and +#' the East and West bounding coordinates should be the same. +#' +#' Note that EML::set_coverage() provides the same (and more) functionality +#' +#' @param description (character) A textual description. +#' @param north (numeric) North bounding coordinate. +#' @param east (numeric) East bounding coordinate. +#' @param south (numeric) South bounding coordinate. +#' @param west (numeric) West bounding coordinate. +#' +#' @return (geographicCoverage) The new geographicCoverage section. +#' eml_geographic_coverage <- function(description, north, east, south, west) { - cov <- new("geographicCoverage") + cov <- list() - cov@geographicDescription <- description + cov$geographicDescription <- description - cov@boundingCoordinates@northBoundingCoordinate <- new("northBoundingCoordinate", as.character(north)) - cov@boundingCoordinates@eastBoundingCoordinate <- new("eastBoundingCoordinate", as.character(east)) - cov@boundingCoordinates@southBoundingCoordinate <- new("southBoundingCoordinate", as.character(south)) - cov@boundingCoordinates@westBoundingCoordinate <- new("westBoundingCoordinate", as.character(west)) + cov$boundingCoordinates$northBoundingCoordinate <- as.character(north) + cov$boundingCoordinates$eastBoundingCoordinate <- as.character(east) + cov$boundingCoordinates$southBoundingCoordinate <- as.character(south) + cov$boundingCoordinates$westBoundingCoordinate <- as.character(west) cov } -#' Create an EML address element. +#' Create an EML address element +#' +#' A simple way to create an EML address element. +#' +#' Note that EML::eml$address() provides the same functionality #' #' @param delivery_points (character) One or more delivery points. -#' @param city (character) City -#' @param administrative_area (character) Administrative area -#' @param postal_code (character) Postal code +#' @param city (character) City. +#' @param administrative_area (character) Administrative area. +#' @param postal_code (character) Postal code. #' #' @return (address) An EML address object. -#' @export #' -#' @examples eml_address <- function(delivery_points, city, administrative_area, postal_code) { stopifnot(is.character(delivery_points), is.character(city), is.character(administrative_area), (is.character(postal_code) || is.numeric(postal_code))) - address <- new("address") - - # Delivery point(s) - dps <- lapply(delivery_points, function(dp) { - x <- new("deliveryPoint") - x@.Data <- dp - x - }) + address <- list() - # City - ct <- new("city") - ct@.Data <- city + address$deliveryPoint <- delivery_points + address$city <- city + address$administrativeArea <- administrative_area + address$postalCode <- as.character(postal_code) - # Administrative area - aa <- new("administrativeArea") - aa@.Data <- administrative_area - - # Postal Code - pc <- new("postalCode") - pc@.Data <- as.character(postal_code) - - # Put them all together - address@deliveryPoint <- new("ListOfdeliveryPoint", dps) - address@city <- ct - address@administrativeArea <- aa - address@postalCode <- pc address } - -#' Set the abstract on an EML document +#' Set the abstract for an EML document #' -#' @param doc (eml) An EML document +#' Set the abstract for an EML document. +#' +#' @param doc (eml) An EML document. #' @param text (character) The abstract text. If \code{text} is length one, an -#' abstract without \code{} or \code{section} elements will be created. -#' If \code{text} is greater than one in length, \code{para} elementes will be -#' used for each element. +#' abstract without \code{} or \code{
} elements will be created. +#' If \code{text} is greater than one in length, \code{para} elementes will be +#' used for each element. #' -#' @return (eml) The modified EML document -#' @export +#' @return (eml) The modified EML document. #' -#' @examples -#' set_abstract(doc, c("Test abstract...")) -#' set_abstract(doc, c("First para", "second para")) set_abstract <- function(doc, text) { - stopifnot(is(doc, "eml")) - stopifnot(is.character(text), - length(text) > 0) + # need to rewrite this test + # stopifnot(is(doc, "eml")) if (length(text) == 1) { - doc@dataset@abstract <- new("abstract", .Data = new("TextType", .Data = "hi")) + doc$dataset$abstract <- list(abstract = text) } else if (length(text) > 1) { - doc@dataset@abstract <- new("abstract", para = new("ListOfpara", lapply(text, function(x) new("para", x)))) + doc$dataset$abstract <- list(abstract = text) } doc } +#' Create an EML abstract +#' +#' Create an EML abstract. +#' +#' Note that eml$abstract() provides the same functionality. +#' +#' @param text (character) Paragraphs of text with one paragraph per element in the +#' character vector, constructed using `list` +#' +#' @return (abstract) An EML abstract. +#' +#' +#' @examples +#' \dontrun{ +#' # Set an abstract with a single paragraph +#' eml_abstract("Test abstract...") +#' +#' # Or one with multiple paragraphs +#' eml_abstract(list("First para...", "second para...")) +#' } +eml_abstract <- function(text) { + stopifnot(is.character(text), + length(text) > 0, + all(nchar(text)) > 0) + + abstract <- list(abstract = list(para = text)) + + abstract +} + + #' Validate an EML attributeList attribute-by-attribute #' #' The attributes passed into this function are validated one-by-one and the #' progress of going through each attribute is printed to the screen along -#' with any and all validation issues. +#' with any and all validation issues. This is done by, for each attribute in the list, +#' creating a minimum valid EML document and adding a new otherEntity with a new +#' attributeList containing the single attribute to be validated. #' -#' This is done by, for each attribute in the list, creating a minimum valid -#' EML document and adding a new otherEntity with a new attributeList containing -#' the single attribute to be validated. +#' @param attributes (attributeList) An attributeList. #' -#' @param attributes (attributeList) An attributeList +#' @return (logical) Named vector indicating which attributes are valid. #' -#' @return (boolean) Named vector of TRUE/FALSE indicating which attributes -#' are valid #' @export #' #' @examples @@ -689,13 +625,13 @@ set_abstract <- function(doc, text) { #' eml_validate_attributes(attributes) #' } eml_validate_attributes <- function(attributes) { - stopifnot(is(attributes, "attributeList")) + # Define an interal applyable function to validate each attribute eml_validate_attribute <- function(attribute) { - stopifnot(is(attribute, "attribute")) + stopifnot(!is.null(names(attribute))) - doc@dataset@otherEntity[[1]]@attributeList@attribute[[1]] <- attribute + doc$dataset$otherEntity$attributeList$attribute[[1]] <- attribute # Validate! eml_validate(doc) @@ -704,20 +640,20 @@ eml_validate_attributes <- function(attributes) { # Create a minimum valid EML doc we'll re-use each time we validate a single # attribute - doc <- new("eml", packageId = "test", system = " test") - doc@dataset@title <- c(new("title", .Data = "test")) - doc@dataset@creator <- new("ListOfcreator", list(eml_creator("Test", "test"))) - doc@dataset@contact <- new("ListOfcontact", list(eml_contact("Test", "test"))) + # Create a dummy otherEntity with our attributeList - entity <- new("otherEntity", - entityName = "name", - entityType = "type") - entity@attributeList <- new("attributeList") - doc@dataset@otherEntity <- new("ListOfotherEntity", list(entity)) - results <- sapply(attributes@attribute, function(attribute) { - cat(paste0("Validating single attribute '", attribute@attributeName@.Data, "': ")) + doc <- list(packageId = "test", + system = "test", + dataset = list( + title = "test", + creator = list(individualName = list(givenName = "test", surName = "test")), + contact = list(individualName = list(givenName = "test", surName = "test")), + otherEntity = list(entityName = "name", entityType = "otherEntity"))) + + results <- sapply(attributes$attribute, function(attribute) { + cat(paste0("Validating single attribute '", attribute$attributeName, "': ")) result <- NULL result <- tryCatch({ @@ -736,141 +672,462 @@ eml_validate_attributes <- function(attributes) { } }) - names(results) <- sapply(attributes@attribute, function(x) x@attributeName) + names(results) <- sapply(attributes$attribute, function(x) x$attributeName) results } +#' Convert otherEntities to dataTables +#' +#' Convert an EML 'otherEntity' object to a 'dataTable' object. This will convert an +#' otherEntity object as currently constructed - it does not add a physical or add attributes. +#' However, if these are already in their respective slots, they will be retained. +#' +#' @param doc (list) An EML document. +#' @param index (integer) The indicies of the otherEntities to be transformed. +#' @param validate_eml (logical) Optional. Whether or not to validate the EML after +#' completion. Setting this to `FALSE` reduces execution time by ~50 percent. +#' +#' @author Dominic Mullen dmullen17@@gmail.com +#' +#' @importFrom magrittr '%>%' +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' doc <- read_eml(system.file("example-eml.xml", package = "arcticdatautils")) +#' +#' doc <- eml_otherEntity_to_dataTable(doc, 1) +#' } +eml_otherEntity_to_dataTable <- function(doc, index, validate_eml = TRUE) { + stopifnot(methods::is(doc, "emld")) + stopifnot(is.logical(eml_validate(doc))) + stopifnot(is.numeric(index)) + stopifnot(length(eml_get_simple(doc$dataset$otherEntity, "entityName")) >= index) + if (any(duplicated(eml_get_simple(doc$dataset, "entityName"))) == T){ + stop(call. = FALSE, + "entityNames must be unique") + } + + ## set OE entityTypes to NULL and select the ones we want to use + + if (length(eml_get_simple(doc$dataset$otherEntity, "entityName")) == 1) { + ## prepare OE to copy + otherEntity <- doc$dataset$otherEntity + ## Handle case where otherEntity is in a list of length 1 (boxed) + if (is.null(names(otherEntity))) { + otherEntity <- otherEntity[[1]] + } + otherEntity$entityType <- NULL + ## delete otherEntity from list + doc$dataset$otherEntity <- NULL + } else { + otherEntity <- doc$dataset$otherEntity[index] + + for (i in 1:length(index)){ + otherEntity[[i]]$entityType <- NULL + } + ## delete otherEntity from list + doc$dataset$otherEntity <- doc$dataset$otherEntity[-index] + } + + + dts <- doc$dataset$dataTable + + ## handle various datatable length cases + if (length(dts) == 0){ + doc$dataset$dataTable <- otherEntity + } else{ + if (length(eml_get_simple(dts, "entityName")) == 1){ + dts <- list(dts) + doc$dataset$dataTable <- c(dts, otherEntity) + } + + else { + doc$dataset$dataTable <- c(dts, otherEntity) + } + } + + ## return eml + if (validate_eml == TRUE) { + valid_eml <- eml_validate(doc) + if (!valid_eml) { + stop(attributes(valid_eml)) + } + } + + return(doc) +} + -#' Add new entity (otherEntity, dataTable, etc) elements to an EML document from a table. +#' Search through EMLs #' -#' @param doc (eml) An EML document -#' @param entities (data.frame) A data.frame with columns type, path, pid, and -#' format_id -#' @param resolve_base (character) Optional. Specify a DataONE CN resolve base -#' URI which will be used for serializing download URLs into the EML. Most users -#' should not override the default value. +#' This function returns indices within an EML list that contain an instance where +#' `test == TRUE`. See examples for more information. +#' +#' @param doc (list) An EML object. +#' @param element (character) Element to evaluate. +#' @param test (function/character) A function to evaluate (see examples). If test is a character, +#' will evaluate if \code{element == test} (see example 1). +#' +#' @import EML #' -#' @return (eml) The modified EML document. #' @export #' +#' @author Mitchell Maier mitchell.maier@@gmail.com +#' #' @examples -#' # Create entities from files on disk #' \dontrun{ -#' types <- c("dataTable") -#' paths <- list.files(., full.names = TRUE) # Get full paths to some files -#' pids <- vapply(paths, function(x) { paste0("urn:uuid:", uuid::UUIDgenerate()) }, "") # Generate some UUID PIDs -#' format_ids <- guess_format_id(paths) # Try to guess format IDs, you should check this afterwards -#' -#' entity_df <- data.frame(type = types, -#' path = paths, -#' pid = pids, -#' format_id = format_ids, -#' stringsAsFactors = FALSE) -#' -#' doc <- new("eml") -#' doc <- eml_add_entities(doc, entity_df) -#'} +#' # Question: Which creators have a surName "Smith"? +#' n <- which_in_eml(eml$dataset$creator, "surName", "Smith") +#' # Answer: eml$dataset$creator[n] +#' +#' # Question: Which dataTables have an entityName that begins with "2016" +#' n <- which_in_eml(eml$dataset$dataTable, "entityName", function(x) {grepl("^2016", x)}) +#' # Answer: eml$dataset$dataTable[n] +#' +#' # Question: Which attributes in dataTable[[1]] have a numberType "natural"? +#' n <- which_in_eml(eml$dataset$dataTable[[1]]$attributeList$attribute, "numberType", "natural") +#' # Answer: eml$dataset$dataTable[[1]]$attributeList$attribute[n] +#' +#' #' # Question: Which dataTables have at least one attribute with a numberType "natural"? +#' n <- which_in_eml(eml$dataset$dataTable, "numberType", function(x) {"natural" %in% x}) +#' # Answer: eml$dataset$dataTable[n] +#' } +which_in_eml <- function(doc, element, test) { + + stopifnot(methods::is(doc, "list")) + stopifnot(is.character(element)) + + if (is.character(test)) { + value = test + test = function(x) {x == value} + + } else { + stopifnot(is.function(test)) + } + + elements_test <- eml_get(doc, element) + + if (is.null(elements_test)) { + location <- NULL + + } else { + result <- test(elements_test) + + if (length(isTRUE(result)) > 1) { + stop("Test must only return one value.") + + } else if (length(isTRUE(result)) == 1){ + location <- which(result == TRUE) + + } else { + location <- NULL + } + } + names(location) <- NULL + return(location) +} + + +#' Set a reference to an EML object +#' +#' This function creates a new object with the same class as \code{element_to_replace} +#' using a reference to \code{element_to_reference}. +#' +#' @param element_to_reference (list) An EML element to reference. +#' @param element_to_replace (list) An EML element to replace with a reference. +#' +#' @author Dominic Mullen dmullen17@@gmail.com +#' +#' @export #' -#' # Read in a CSV containing the info about files on disk +#' @examples #' \dontrun{ -#' entity_df <- read.csv("./my_entities.csv", stringsAsFactors = FALSE) -#' doc <- new("eml") -#' doc <- eml_add_entities(doc, entity_df) +#' cn <- dataone::CNode('PROD') +#' adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +#' doc <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) +#' +#' # Set the first contact as a reference to the first creator +#' doc$dataset$contact[[1]] <- eml_set_reference(doc$dataset$creator[[1]], +#' doc$dataset$contact[[1]]) +#' +#' # This is also useful when we want to set references to a subset of 'dataTable' +#' or 'otherEntity' objects +#' # Add a few more objects first to illustrate the use: +#' doc$dataset$dataTable[[3]] <- doc$dataset$dataTable[[1]] +#' doc$dataset$dataTable[[4]] <- doc$dataset$dataTable[[1]] +#' # Add references to the second and third elements only (not the 4th): +#' for (i in 2:3) { +#' doc$dataset$dataTable[[i]]$attributeList <- eml_set_reference( +#' doc$dataset$dataTable[[1]]$attributeList, +#' doc$dataset$dataTable[[i]]$attributeList) #' } -eml_add_entities <- function(doc, entities, resolve_base="https://cn.dataone.org/cn/v2/resolve/") { - stopifnot(is(doc, "eml")) +#' # If we print the entire 'dataTable' list we see elements 2 and 3 have +#' references while 4 does not. +#' +#' doc$dataset$dataTable +#' } +eml_set_reference <- function(element_to_reference, element_to_replace) { + if (length(element_to_reference$id) == 0) { + stop('No id detected at element_to_reference$id. Please add an id in order to use references.') + } + id <- element_to_reference$id[1] + element_to_replace <- list(references = id) + return(element_to_replace) +} + - if (!is(entities, "data.frame")) { - stop("The argument 'entities' must be a 'data.frame'.") +#' Set shared attribute references +#' +#' This function sets shared attributes using the attributes of the first \code{type} +#' selected and creates references for all remaining objects of equivalent \code{type}. +#' +#' @param doc (emld) An EML object. +#' @param attributeList (attributeList) Optional. An EML attributeList object. If not provided +#' then it will default to the attributeList of the first \code{type} element. +#' @param type (character) Optional. Specifies whether to replace 'dataTable' or 'otherEntity' +#' attributeList objects with references. Defaults to 'dataTable'. +#' +#' @return (doc) The modified EML document. +#' +#' @author Dominic Mullen dmullen17@@gmail.com +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' cn <- dataone::CNode('PROD') +#' adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +#' doc <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) +#' atts <- EML::set_attributes( +#' EML::get_attributes(eml$dataset$dataTable[[1]]$attributeList)$attributes) +#' +#' eml <- eml_set_shared_attributes(eml, atts, type = 'dataTable') +#' } +eml_set_shared_attributes <- function(doc, attributeList = NULL, type = 'dataTable') { + stopifnot(methods::is(doc, 'emld')) + stopifnot(type %in% c('dataTable', 'otherEntity')) + + x <- doc$dataset[[type]] + n <- length(x) + if (n <= 1) { + stop('1 or fewer entities') # add message } - if (!identical(sort(names(entities)), c("format_id", "path", "pid", "type"))) { - stop("The columns in the data.frame you passed in for the 'entities' argument did not have the expected column names of type, path, pid, format_id and it must.", call. = FALSE) + # If a new attributeList is provided set it + if (!is.null(attributeList)) { + x[[1]]$attributeList <- attributeList } + x[[1]]$attributeList$id <- stringi::stri_rand_strings(1, length = 10) # generate random identifier + # Apply references to all other elements + for (i in 2:n) { + x[[i]]$attributeList <- eml_set_reference(x[[1]]$attributeList, x[[i]]$attributeList) + } + + doc$dataset[[type]] <- x + return(doc) +} + +#' Get a simple list output from EML::eml_get() +#' +#' This function is a convenience wrapper around EML::eml_get() which +#' returns the output as a simple list as opposed to an object of type +#' `emld` by removing the attributes and context from the object. If an +#' element containing children is returned all of it's children will be +#' flattened into a named character vector. This function is best used +#' to extract values from elements that have no children. +#' +#' @param doc (list) An EML object or child/descendant object +#' @param element (character) Name of the element to be extracted. If +#' multiple occurrences are found, will extract all. +#' +#' @return out (vector) A list of values contained in element given +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' cn <- dataone::CNode('PROD') +#' adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +#' +#' doc <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) +#' +#' datatable_names <- eml_get_simple(doc$dataset$dataTable, element = "entityName") +#'} +#' +eml_get_simple <- function(doc, element){ + out <- eml_get(doc, element, from = "list") + out$`@context` <- NULL + attributes(out) <- NULL + out <- unlist(out) + return(out) +} + +#' Reorder a named list of objects according to the order in the metadata +#' +#' This function takes a named list of data objects, such as what is +#' returned from `get_package`, and reorders them according to the order +#' they are given in the EML document. +#' +#' @param pid_list (list) A named list of data pids +#' @param doc (list) an `emld` document +#' +#' @return ordered_pids (list) A list of reordered pids +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' cn <- dataone::CNode('PROD') +#' adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +# +#' ids <- get_package(adc, 'resource_map_doi:10.18739/A2S17SS1M', file_names = TRUE) +#' doc <- EML::read_eml(dataone::getObject(adc, ids$metadata)) +#' +#' # return all entity types +#' ordered_pids <- reorder_pids(ids$data, doc) +#'} +#' +reorder_pids <- function(pid_list, doc){ + stopifnot(!is.null(names(pid_list))) - entity_types <- c("dataTable", "spatialRaster", "spatialVector", "storedProcedure", "view", "otherEntity") + entity_names <- eml_get_simple(doc, "entityName") - if (!all(entities$type %in% entity_types)) { - stop(call. = FALSE, paste0("The `type` column must only include values from: ", paste(entity_types, collapse = ", "), ".")) + if (is.null(entity_names)){ + stop("No entity names were found.") } - # Warn about existing entities - for(type in entity_types) { - if (type %in% entities$type && length(slot(doc@dataset, type)) > 0) { - warning(paste0("You are adding one or more ", type, " elements. This function only adds entities and does not remove/replace them.")) - } + if (length(entity_names) != length(pid_list)){ + stop("Number of entities in EML and resource map do not match") } - # Internal function to create a single entity - eml_entity <- function(type, path, pid, format_id) { - # Convert args to character vectors if needed - if (is.factor(type)) type <- as.character(type) - if (is.factor(path)) path <- as.character(path) - if (is.factor(pid)) pid <- as.character(pid) - if (is.factor(format_id)) format_id <- as.character(format_id) + ordered_pids <- pid_list[order(match(names(pid_list), entity_names))] + return(ordered_pids) +} - stopifnot(file.exists(path)) - stopifnot(is.character(path), nchar(path) > 0) - stopifnot(is.character(pid), nchar(pid) > 0) - stopifnot(is.character(format_id), nchar(format_id) > 0) +#' Create an EML project section from a list of NSF award numbers +#' +#' This function takes a list of NSF award numbers and uses it to +#' query the NSF API to get the award title, PIs, and coPIs. The +#' return value is an EML project section. The function supports 1 +#' or more award numbers +#' +#' @param awards (list) A list of NSF award numbers as characters +#' @param eml_version (char) EML version to use (2.1.1 or 2.2.0) +#' @return project (emld) An EML project section +#' +#' @export +#' +#' @examples +#' awards <- c("1203146", "1203473", "1603116") +#' +#' proj <- eml_nsf_to_project(awards, eml_version = "2.1.1") +#' +#' me <- list(individualName = list(givenName = "Jeanette", surName = "Clark")) +#' +#' doc <- list(packageId = "id", system = "system", +#' dataset = list(title = "A Mimimal Valid EML Dataset", +#' creator = me, +#' contact = me)) +#' +#' doc$dataset$project <- proj +#' +#' EML::eml_validate(doc) +#' +eml_nsf_to_project <- function(awards, eml_version = "2.1"){ + + stopifnot(is.character(awards)) + stopifnot(eml_version %in% c("2.1", "2.1.1", "2.2", "2.2.0")) - file_name <- basename(path) + award_nums <- awards - entity <- new(type) - entity@id <- new("xml_attribute", pid) - entity@scope <- new("xml_attribute", "document") + result <- lapply(award_nums, function(x){ + url <- paste0("https://api.nsf.gov/services/v1/awards.json?id=", x ,"&printFields=coPDPI,pdPIName,title") - entity@entityName <- new("entityName", .Data = file_name) + t <- jsonlite::fromJSON(url) - if (type == "otherEntity") { - entity@entityType <- "Other" + if ("serviceNotification" %in% names(t$response)) { + warning(paste(t$response$serviceNotification$notificationType, "for award", x , "\n this award will not be included in the project section."), call. = FALSE) + t <- NULL + } + else if (length(t$response$award) == 0){ + warning(paste("Empty result for award", x, "\n this award will not be included in the project section."), call. = FALSE) + t <- NULL } + else t + }) - # otherEntity/physical - physical <- new("physical") - physical@scope <- new("xml_attribute", "document") - physical@objectName <- new("objectName", file_name) + i <- lapply(result, function(x) {!is.null(x)}) + result <- result[unlist(i)] + award_nums <- award_nums[unlist(i)] - physical@size <- new("size", format(file.size(path), scientific = FALSE)) - physical@size@unit <- new("xml_attribute", "bytes") - physical@authentication <- new("ListOfauthentication", list(new("authentication", digest::digest(path, algo = "sha1", file = TRUE)))) - physical@authentication[[1]]@method <- new("xml_attribute", "SHA-1") + if (length(award_nums) == 0){ + stop(call. = F, + "No valid award numbers were found.") + } - physical@dataFormat <- new("dataFormat") - physical@dataFormat@externallyDefinedFormat <- new("externallyDefinedFormat") - physical@dataFormat@externallyDefinedFormat@formatName <- format_id + co_pis <- lapply(result, function(x){ + extract_name(x$response$award$coPDPI) + }) - physical@distribution <- new("ListOfdistribution", list(new("distribution"))) - physical@distribution[[1]]@scope <- new("xml_attribute", "document") - physical@distribution[[1]]@online <- new("online") - physical@distribution[[1]]@online@url <- new("url", paste0(resolve_base, pid)) + co_pis <- unlist(co_pis, recursive = F) + co_pis <- do.call("rbind", co_pis) + if (!is.null(co_pis)){ + co_pis$role <- "coPrincipalInvestigator" + } - slot(physical@distribution[[1]]@online@url, "function") <- new("xml_attribute", "download") + pis <- lapply(result, function(x){ + extract_name(x$response$award$pdPIName) + }) - entity@physical <- new("ListOfphysical", list(physical)) + pis <- unlist(pis, recursive = F) + pis <- do.call("rbind", pis) %>% + dplyr::mutate(role = "principalInvestigator") - entity + people <- dplyr::bind_rows(co_pis, pis) %>% + dplyr::distinct() + + p_list <- list() + for (i in 1:nrow(people)){ + p_list[[i]] <- eml_personnel(given_names = people$firstName[i], + sur_name = people$lastName[i], + role = people$role[i]) } - # Create new entities - new_entities <- lapply(entity_types, function(type) { - lapply(which(entities$type == type), function(i) { - eml_entity(entities[i, "type"], - entities[i,"path"], - entities[i,"pid"], - entities[i,"format_id"]) - }) + titles <- lapply(result, function(x){ + unlist(x$response$award$title) }) - names(new_entities) <- entity_types # Name the list so we can [[ by type - - # Merge new entities into existing - for (type in entity_types) { - slot(doc@dataset, type) <- new(paste0("ListOf", type), c(slot(doc@dataset, type), - new(paste0("ListOf", type), new_entities[[type]]))) + if (eml_version %in% c("2.1", "2.1.1")){ + award_nums <- paste("NSF", award_nums) + proj <- eml_project(title = titles, personnelList = p_list, funding = award_nums) } + else if (eml_version %in% c("2.2", "2.2.0")){ + awards <- list() + + for (i in 1:length(award_nums)){ + awards[[i]] <- list(title = titles[i], + funderName = "National Science Foundation", + funderIdentifier = "https://doi.org/10.13039/00000001", + awardNumber = award_nums[i], + awardUrl = paste0("https://www.nsf.gov/awardsearch/showAward?AWD_ID=", award_nums[i])) + } - doc + proj <- list(title = titles, personnel = p_list, award = awards) + return(proj) + } } +# Extract first and last name from NSF API results +# +# The NSF API jams the first name, last name, and middle initial if it exists into a single string. +# This simple helper uses some regex to split the names up. +extract_name <- function(x){ + lapply(x, function(x) { + data.frame( + firstName = trimws(stringr::str_extract(x, "[A-Za-z]{2,}\\s[A-Z]?")), + lastName = trimws(gsub("[A-Za-z]{2,}\\s[A-Z]?", "", x)), + stringsAsFactors = F)}) +} diff --git a/R/environment.R b/R/environment.R index 3ee6ac1..ece3170 100644 --- a/R/environment.R +++ b/R/environment.R @@ -1,10 +1,9 @@ -#' environment.R -#' Author: Bryce Mecum -#' -#' Functions related to loading configuriation based upon the environment -#' the code is being run under. +# Functions related to loading configuriation based upon the environment +# the code is being run under. +#' Get the current environment name +#' #' Get the current environment name. #' #' @return (character) The environment name. @@ -20,6 +19,9 @@ env_get <- function() { env } + +#' Load environmental variables from a YAML-formatted environment file +#' #' Load environmental variables from a YAML-formatted environment file. #' #' This file should be formatted in the following way: @@ -34,10 +36,15 @@ env_get <- function() { #' @param skip_mn (logical) Optional. Skip contacting the MNode and filling in the $mn element of the environment. #' #' @return (list) A list of name-value pairs. -#' @export #' -#' @examples +#' @noRd env_load <- function(name=NULL, path=NULL, skip_mn=FALSE) { + if (!requireNamespace("yaml")) { + stop(call. = FALSE, + "The package 'yaml' must be installed to run this function. ", + "Please install it and try again.") + } + # Determine the environment to load if (is.null(name)) { name <- env_get() diff --git a/R/formats.R b/R/formats.R new file mode 100644 index 0000000..263f86a --- /dev/null +++ b/R/formats.R @@ -0,0 +1,273 @@ +# Functions related to data object formats + + +#' Get the list of valid formats from DataONE +#' +#' Note that this function is intended to return even if the request to the CN +#' fails. This is so other functions can call continue even if the request +#' fails. +#' +#' @param url (character) The listFormats endpoint. Defaults to the production CN. +#' +#' @return (character) A vector of formats. +#' +#' @noRd +get_formats <- function(url = "https://cn.dataone.org/cn/v2/formats") { + req <- httr::GET(url) + + if (httr::status_code(req) != 200) { + warning(paste0("Failed to load an up-to-date list of format IDs from ", url, " because the request to the CN failed. Checking of format IDs is disabled.")) + return(vector("character")) + } + + formats_content <- httr::content(req, encoding = "UTF-8") + format_id_nodes <- xml2::xml_find_all(formats_content, "//formatId") + + if (length(format_id_nodes) == 0) { + return(vector("character")) + } + + vapply(format_id_nodes, function(x) { + xml2::xml_text(x) + }, + "") +} + + +#' Check that the given format is valid +#' +#' Check that the given format is valid. Validity is determined by the given format +#' being found in the list on . +#' +#' @param format (character) The format ID to check. +#' +#' @return (logical) Whether or not the format was valid. +#' +#' @noRd +check_format <- function(format) { + formats <- get_formats() + + if (!(format %in% formats)) + stop(call. = FALSE, + paste0("The provided format_id of '", + format, + "' is not a valid format ID. Check what you entered against ", + "the list of format IDs on ", + "https://cn.dataone.org/cn/v2/formats.")) + + invisible(TRUE) +} + + +#' Guess format from filename +#' +#' Guess format from filename for a vector of filenames. +#' +#' @param filenames (character) A vector of filenames. +#' +#' @return (character) DataONE format IDs. +#' +#' @export +#' +#' @examples +#' formatid <- guess_format_id("temperature_data.csv") +guess_format_id <- function(filenames) { + extensions <- tolower(tools::file_ext(filenames)) + filetypes <- vector(mode = "character", length = length(extensions)) + + for (i in seq_len(length(extensions))) { + extension <- extensions[i] + + if (extension %in% names(dataone_format_mappings)) { + filetypes[i] <- dataone_format_mappings[extension][[1]] + } else { + filetypes[i] <- "application/octet-stream" + } + } + + filetypes +} + + +# List of DataONE formats used in guess_format_id() +dataone_format_mappings <- list("avi" = "video/avi", + "bmp" = "image/bmp", + "bz2" = "application/x-bzip2", + "csv" = "text/csv", + "doc" = "application/msword", + "docx" = "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "fasta" = "application/x-fasta", + "gif" = "image/gif", + "gz" = "application/x-gzip", + "html" = "text/html", + "ipynb" = "application/json", + "jp2" = "image/jp2", + "jpg" = "image/jpeg", + "jpeg" = "image/jpeg", + "kml" = "application/vnd.google-earth.kml/xml", + "kmz" = "application/vnd.google-earth.kmz", + "md" = "text/markdown", + "mov" = "video/quicktime", + "mp3" = "audio/mpeg", + "mp4" = "video/mp4", + "mpg" = "video/mpeg", + "mpeg" = "video/mpeg", + "n3" = "text/n3", + "nc" = "netCDF-3", + "pdf" = "application/pdf", + "png" = "image/png", + "ppt" = "application/vnd.ms-powerpoint", + "pptx" = "application/vnd.openxmlformats-officedocument.presentationml.presentation", + "py" = "application/x-python", + "qt" = "video/quicktime", + "r" = "application/R", + "rar" = "application/x-rar-compressed", + "rdf" = "application/rdf/xml", + "rmd" = "text/x-rmarkdown", + "sas" = "application/SAS", + "svg" = "image/svg/xml", + "tar" = "application/x-tar", + "tif" = "image/tiff", + "tiff" = "image/tiff", + "ttl" = "text/turtle", + "tsv" = "text/tsv", + "txt" = "text/plain", + "wav" = "audio/x-wav", + "wma" = "audio/x-ms-wma", + "wmv" = "video/x-ms-wmv", + "xls" = "application/vnd.ms-excel", + "xlsx" = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "xml" = "application/xml", + "zip" = "application/zip") + + +#' Determine the format ID for a NetCDF file +#' +#' Determine the DataONE format ID for a NetCDF file provided by path. +#' +#' @param path (character) Full or relative path to the file in question. +#' +#' @return (character) The DataONE format ID. +#' +#' @noRd +get_netcdf_format_id <- function(path) { + stopifnot(is.character(path), + nchar(path) > 0, + file.exists(path)) + + if (!requireNamespace("ncdf4")) { + stop(call. = FALSE, + "The package 'ncdf4' must be installed to run this function. ", + "Please install it and try again.") + } + + # Try to open the file, capturing errors + cdf_file <- try({ + ncdf4::nc_open(path) + }) + + # If we failed to open the file, we can assume it's not a valid NetCDF file + # and we just return application/octet-stream as the format ID + if (inherits(cdf_file, "try-error")) { + return("application/octet-stream") + } + + # Since we got this far, continue detecting the format + stopifnot("format" %in% names(cdf_file)) + format_string <- cdf_file$format + stopifnot(is.character(format_string), + nchar(format_string) > 0) + format_id = "" + + if (format_string == "NC_FORMAT_CLASSIC") { + format_id = "netCDF-3" + } else if (format_string == "NC_FORMAT_NETCDF4") { + format_id = "netCDF-4" + } else { + stop("Unknown NetCDF format discovered.") + } + + return(format_id) +} + + +#' Test whether an object has a particular format ID +#' +#' Test whether an object has a particular format ID. +#' +#' @param node (MNode|CNode) The Coordinating/Member Node to run the query on. +#' @param pids (character) The PID(s) for objects. +#' @param format_id (character) The format IDs. +#' +#' @return (logical) +#' +#' @noRd +is_format_id <- function(node, pids, format_id) { + stopifnot(class(node) %in% c("MNode", "CNode")) + stopifnot(all(is.character(pids)), + all(lengths(pids) > 0)) + stopifnot(is.character(format_id), + nchar(format_id) > 0) + + result <- vector("logical", length(pids)) + + for (i in seq_along(pids)) { + result[i] <- dataone::getSystemMetadata(node, pids[i])@formatId == format_id + } + + result +} + + +# The following are a set of thin functions which return the DataONE format ID string. +# These are to aid in filling in function arguments and can't remember or don't want to +# type in the full format ID. By putting these format ID strings into +# functions, a user's autocompletion routine in their editor can help them +# fill in the format ID they want. + + +#' Generate the ISO 19139 format ID +#' +#' Returns the ISO 19139 format ID. +#' +#' @return (character) The format ID for ISO 19139. +#' +#' @export +#' +#' @examples +#' format_iso() +#' \dontrun{ +#' # Upload a local ISO19139 XML file: +#' env <- env_load() +#' publish_object(env$mn, "path_to_some_EML_file", format_iso()) +#' } +format_iso <- function() { + "http://www.isotc211.org/2005/gmd" +} + + +#' Generate the EML 2.1.1 format ID +#' +#' Returns the EML 2.1.1 format ID. +#' @param version The version of EML ('2.1.1' or '2.2.0') +#' +#' @return (character) The format ID for EML 2.1.1. +#' +#' @export +#' +#' @examples +#' format_eml("2.1.1") +#' \dontrun{ +#' # Upload a local EML 2.1.1 file: +#' env <- env_load() +#' publish_object(env$mn, "path_to_some_EML_file", format_eml("2.1")) +#' } +format_eml <- function(version) { + if (version %in% c("2.1","2.1.1", "1", 1)){ + "eml://ecoinformatics.org/eml-2.1.1" + } + else if (version %in% c("2.2","2.2.0", "2", 2)){ + "https://eml.ecoinformatics.org/eml-2.2.0" + } + else print("Please specify a recognized version name, either '2.1.1' or '2.2.0'") +} diff --git a/R/helpers.R b/R/helpers.R index 32148c0..47006f7 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -1,23 +1,55 @@ -#' helpers.R -#' -#' Various helper functions for things like testing the package. +# Various helper functions for things like testing a package -#' Create a test metadata object. +#' Create a test metadata object +#' +#' Create a test EML metadata object. #' #' @param mn (MNode) The Member Node. #' @param data_pids (character) Optional. PIDs for data objects the metadata documents. #' +#' @return (character) The PID of the published metadata document. +#' #' @export -create_dummy_metadata <- function(mn, data_pids=NULL) { +#' +#' @examples +#'\dontrun{ +#' # Set environment +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pid <- create_dummy_metadata(mn) +#' } +create_dummy_metadata <- function(mn, data_pids = NULL) { + + # Make sure the node is not a production node + if (mn@env == "prod") { + stop('Can not create dummy metadata on production node.') + } + pid <- paste0("urn:uuid:", uuid::UUIDgenerate()) me <- get_token_subject() # Copy the original EML file to a temporary place original_file <- file.path(system.file(package = "arcticdatautils"), "example-eml.xml") + doc <- read_eml(original_file) + + if (is.null(data_pids)){ + doc$dataset$otherEntity <- NULL + } + else if (!is.null(data_pids)){ + oe <- list() + for (i in 1:length(data_pids)){ + oe[[i]] <- list(entityName = "dummy_object", + entityDescription = data_pids[i], + entityType = "text/plain") + } + + doc$dataset$otherEntity <- oe + } + metadata_file <- tempfile() - file.copy(original_file, metadata_file) + write_eml(doc, metadata_file) sysmeta <- new("SystemMetadata", id = pid, @@ -36,7 +68,7 @@ create_dummy_metadata <- function(mn, data_pids=NULL) { sysmeta <- add_admin_group_access(sysmeta) sysmeta <- datapack::addAccessRule(sysmeta, "public", "read") - log_message(paste0("Creating metadata ", pid)) + message(paste0("Creating metadata ", pid)) pid <- dataone::createObject(mn, pid, metadata_file, sysmeta) # Remove the temporary EML File @@ -45,15 +77,32 @@ create_dummy_metadata <- function(mn, data_pids=NULL) { pid } -#' Create a test object. + +#' Create a test object +#' +#' Create a test data object. #' #' @param mn (MNode) The Member Node. #' -#' @return +#' @return (character) The PID of the dummy object. +#' #' @export #' #' @examples +#'\dontrun{ +#' # Set environment +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' +#' pid <- create_dummy_object(mn) +#'} create_dummy_object <- function(mn) { + + # Make sure the node is not a production node + if (mn@env == "prod") { + stop('Can not create dummy object on production node.') + } + pid <- paste0("urn:uuid:", uuid::UUIDgenerate()) me <- get_token_subject() tmp <- tempfile() @@ -77,7 +126,7 @@ create_dummy_object <- function(mn) { sysmeta <- add_admin_group_access(sysmeta) sysmeta <- datapack::addAccessRule(sysmeta, "public", "read") - log_message(paste0("Creating object ", pid)) + message(paste0("Creating object ", pid)) create_response <- dataone::createObject(mn, pid, tmp, sysmeta) file.remove(tmp) @@ -85,16 +134,33 @@ create_dummy_object <- function(mn) { create_response } -#' Create a test package. + +#' Create a test package +#' +#' Create a test data package. #' #' @param mn (MNode) The Member Node. -#' @param size (numeric) The number of files in the package. +#' @param size (numeric) The number of files in the package, including the metadata file. +#' +#' @return (list) The PIDs for all elements in the data package. #' -#' @return #' @export #' #' @examples +#'\dontrun{ +#' # Set environment +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' #Create dummy package with 5 data objects and 1 metadata object +#' pids <- create_dummy_package(mn, 6) +#' } create_dummy_package <- function(mn, size = 2) { + + # Make sure the node is not a production node + if (mn@env == "prod") { + stop('Can not create dummy package on production node.') + } + me <- get_token_subject() # Data objects @@ -134,7 +200,7 @@ create_dummy_package <- function(mn, size = 2) { sysmeta <- add_admin_group_access(sysmeta) sysmeta <- datapack::addAccessRule(sysmeta, "public", "read") - log_message(paste0("Creating resource map ", pid)) + message(paste0("Creating resource map ", pid)) resource_map_pid <- dataone::createObject(mn, pid, resmap_path, sysmeta) list(metadata = meta_pid, @@ -142,16 +208,35 @@ create_dummy_package <- function(mn, size = 2) { data = data_pids) } -#' Create a test parent package. + +#' Create a test parent package +#' +#' Create a test parent data package. #' #' @param mn (MNode) The Member Node. #' @param children (character) Child package (resource maps) PIDs. #' -#' @return +#' @return (list) The resource map PIDs for both the parent and child packages. +#' #' @export #' #' @examples +#'\dontrun{ +#' # Set environment +# cn <- CNode("STAGING2") +# mn <- getMNode(cn,"urn:node:mnTestKNB") +# +# child_pid <- "urn:uuid:39a59f99-118b-4c81-9747-4b6c43308e00" +# +# create_dummy_parent_package(mn, child_pid) +#'} create_dummy_parent_package <- function(mn, children) { + + # Make sure the node is not a production node + if (mn@env == "prod") { + stop('Can not create dummy parent package on production node.') + } + me <- get_token_subject() meta_pid <- create_dummy_metadata(mn) @@ -178,9 +263,453 @@ create_dummy_parent_package <- function(mn, children) { sysmeta <- add_admin_group_access(sysmeta) sysmeta <- datapack::addAccessRule(sysmeta, "public", "read") - log_message(paste0("Creating parent package map ", pid)) + message(paste0("Creating parent package map ", pid)) create_response <- createObject(mn, pid, resmap_path, sysmeta) list(parent = create_response, children = children) } + + +#' Create test attributes data.frame +#' +#' Create a test data.frame of attributes. +#' +#' @param numberAttributes (integer) Number of attributes to be created in the table. +#' @param factors (character) Optional vector of factor names to include. +#' +#' @return (data.frame) A data.frame of attributes. +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' # Create dummy attribute dataframe with 6 attributes and 1 factor +#' attributes <- create_dummy_attributes_dataframe(6, c("Factor1", "Factor2")) +#' } +create_dummy_attributes_dataframe <- function(numberAttributes, factors = NULL) { + names <- vapply(seq_len(numberAttributes), function(x) { paste0("Attribute ", x)}, "") + domains <- rep("textDomain", numberAttributes) + + if(!is.null(factors)) { + domains <- c(rep("textDomain", numberAttributes - length(factors)), + rep("enumeratedDomain", length(factors))) + names[seq((numberAttributes - length(factors) + 1), numberAttributes)] <- factors + } + + attributes <- data.frame(attributeName = names, + attributeDefinition = names, + measurementScale = rep("nominal", numberAttributes), + domain = domains, + formatString = rep(NA, numberAttributes), + definition = names, + unit = rep(NA, numberAttributes), + numberType = rep(NA, numberAttributes), + missingValueCode = rep(NA, numberAttributes), + missingValueCodeExplanation = rep(NA, numberAttributes), + stringsAsFactors = FALSE) + + attributes +} + + +#' Create test enumeratedDomain data.frame +#' +#' Create a test data.frame of enumeratedDomains. +#' +#' @param factors (character) Vector of factor names to include. +#' +#' @return (data.frame) A data.frame of factors. +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' # Create dummy dataframe of 2 factors/enumerated domains +#' attributes <- create_dummy_enumeratedDomain_dataframe(c("Factor1", "Factor2")) +#' } +create_dummy_enumeratedDomain_dataframe <- function(factors) { + names <- rep(factors, 4) + enumeratedDomains <- data.frame(attributeName = names, + code = paste0(names, seq_along(names)), + definition = names) + + enumeratedDomains +} + + +#' Create dummy package with fuller metadata +#' +#' Creates a fuller package than [create_dummy_package()] +#' but is otherwise based on the same concept. This dummy +#' package includes multiple data objects, responsible parties, +#' geographic locations, method steps, etc. +#' +#' @param mn (MNode) The Member Node. +#' @param title (character) Optional. Title of package. Defaults to "A Dummy Package". +#' +#' @return (list) The PIDs for all elements in the data package. +#' +#' @import EML +#' @import dataone +#' +#' @export +create_dummy_package_full <- function(mn, title = "A Dummy Package") { + stopifnot(is(mn, "MNode")) + stopifnot(is.character(title), nchar(title) > 0) + if (mn@env == "prod") { + stop("Cannot create dummy package on production node.") + } + + # Create objects + file.create(c("dummy1.csv", "dummy2.csv", "dummy1.jpg", "dummy1.R")) + # TODO: add actual data to dummy files + + pid_csv1 <- publish_object(mn, + path = "dummy1.csv", + format_id = "text/csv") + + pid_csv2 <- publish_object(mn, + path = "dummy2.csv", + format_id = "text/csv") + + pid_jpg1 <- publish_object(mn, + path = "dummy1.jpg", + format_id = "image/jpeg") + + pid_R1 <- publish_object(mn, + path = "dummy1.R", + format_id = "application/R") + + unlink(c("dummy1.csv", "dummy2.csv", "dummy1.jpg", "dummy1.R")) + + data_pids <- c(pid_csv1, pid_csv2, pid_jpg1, pid_R1) + + # Import EML + eml_path_original <- file.path(system.file(package = "arcticdatautils"), "example-eml-full.xml") + doc <- EML::read_eml(eml_path_original) + + # Add objects to EML + doc$dataset$title <- title + + attr <- data.frame( + attributeName = c("Date", "Location", "Salinity", "Temperature"), + attributeDefinition = c("Date sample was taken on", "Location code representing location where sample was taken", "Salinity of sample in PSU", "Temperature of sample"), + measurementScale = c("dateTime", "nominal","ratio", "interval"), + domain = c("dateTimeDomain", "enumeratedDomain","numericDomain", "numericDomain"), + formatString = c("MM-DD-YYYY", NA, NA, NA), + definition = c(NA,NA, NA, NA), + unit = c(NA, NA, "dimensionless", "celsius"), + numberType = c(NA, NA, "real", "real"), + missingValueCode = c(NA, NA, NA, NA), + missingValueCodeExplanation = c(NA, NA, NA, NA), + stringsAsFactors = FALSE) + + location <- c(CASC = "Cascade Lake", CHIK = "Chikumunik Lake", HEAR = "Heart Lake", NISH = "Nishlik Lake") + fact <- data.frame(attributeName = "Location", code = names(location), definition = unname(location)) + + attributeList <- EML::set_attributes(attributes = attr, factors = fact) + + dT1 <- pid_to_eml_entity(mn, + pid = pid_csv1, + entity_type = "dataTable") + dT1$attributeList <- attributeList + + dT2 <- pid_to_eml_entity(mn, + pid = pid_csv2, + entity_type = "dataTable") + dT2$attributeList <- attributeList + + doc$dataset$dataTable <- list(dT1, dT2) + + oE1 <- pid_to_eml_entity(mn, + pid = pid_jpg1, + entity_type = "otherEntity") + + oE2 <- pid_to_eml_entity(mn, + pid = pid_R1, + entity_type = "otherEntity") + + doc$dataset$otherEntity <- list(oE1, oE2) + + eml_path <- tempfile(fileext = ".xml") + EML::write_eml(doc, eml_path) + + pid_eml <- publish_object(mn, + path = eml_path, + format_id = "eml://ecoinformatics.org/eml-2.1.1") + + # Create resource map + resource_map_pid <- create_resource_map(mn, + metadata_pid = pid_eml, + data_pids = data_pids) + + file.remove(eml_path) + + return(list(resource_map = resource_map_pid, + metadata = pid_eml, + data = data_pids)) +} + +#' Retrieve a name from an ORCID URL +#' +#' Retrieve first and last name from an ORCID URL. +#' +#' @param orcid_url (character) A valid ORCID URL address. +#' +#' @return (character) First and last name. +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' pi_name <- get_orcid_name('https://orcid.org/0000-0002-2561-5840') +#' } + +get_orcid_name <- function(orcid_url) { + req <- httr::GET(paste0(orcid_url, "/person.json")) + if (req$status_code != 200) { + stop('Failed to read in ', orcid_url) + } + json <- httr::content(req) + + display_name <- json$displayName + + if (is.null(display_name)){ + display_name <- NA + } + return(display_name) +} + +#' Retrieve an email address from an ORCID URL +#' +#' Retrieve public email addresses from an ORCID URL. +#' +#' @param orcid_url (character) A valid ORCID URL address. +#' +#' @return (character) Public e-mail addresses. +#' @export +#' +#' +#' @examples +#' \dontrun{ +#' pi_email <- get_orcid_email('https://orcid.org/0000-0002-2561-5840') +#' } + +get_orcid_email <- function(orcid_url) { + req <- httr::GET(paste0(orcid_url, "/person.json")) + if (req$status_code != 200) { + stop('Failed to read in ', orcid_url) + } + json <- httr::content(req) + email_list <- eml_get_simple(json$publicGroupedEmails, "email") %>% paste0(., collapse = ";") + if (is.null(email_list)){ + email_list <- NA + } + return(email_list) +} + + +#' List recent submissions to a DataOne Member Node +#' +#' List recent submissions to a DataOne Member Node from all submitters not present +#' in the administrator whitelist: https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org +#' +#' @param mn (MNode) A DataOne Member Node +#' @param from (character) the date at which the query begins in 'YYYY/MM/DD' format. Defaults to \code{Sys.Date()} +#' @param to (character) the date at which the query ends in 'YYYY/MM/DD' format. Defaults to \code{Sys.Date()} +#' @param formatType (character) the format of objects to query. Must be one of: RESOURCE, METADATA, DATA, or *. +#' @param use_whitelist (logical) Whether to filter out ADC admins, as listed at: https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org +#' +#' @export +#' +#' @author Dominic Mullen dmullen17@@gmail.com +#' +#' @examples +#' \dontrun{ +#' cn <- dataone::CNode('PROD') +#' adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +#' +#' View(arcticdatautils::list_submissions(adc, '2018-10-01', '2018-10-07')) +#' +#' # Return all submitted objects in the past month for the 'adc' node: +#' library(lubridate) +#' View(list_submissions(adc, Sys.Date() %m+% months(-1), Sys.Date(), '*')) +#' +#' # Return all submitted objects except for one user +#' library(lubridate) +#' View(list_submissions(adc, Sys.Date() %m+% months(-1), Sys.Date(), '*'), +#' whitelist = 'http://orcid.org/0000-0002-2561-5840') +#' +#' } +list_submissions <- function(mn, from = Sys.Date(), to = Sys.Date(), formatType = '*', + use_whitelist = T) { + if (!requireNamespace('lubridate', "purrr", 'RCurl')) { + stop(call. = FALSE, + 'The packages "lubridate", "purrr, and "RCurl" must be installed to run this function. ', + 'Please install them and try again.') + } + stopifnot(methods::is(mn, 'MNode')) + if (!is_token_set(mn)) { + stop('No token set') + } + if (!(lubridate::is.Date(as.Date(from, '%Y/%M/%D')))){ + stop('"from" argument must be in YYYY/MM/DD format') + } + if (!(lubridate::is.Date(as.Date(to, '%Y/%M/%D')))){ + stop('"to" argument must be in YYYY/MM/DD format') + } + if (!(formatType %in% c('RESOURCE', 'METADATA', 'DATA', '*'))) { + stop('formatType must be one of: RESOURCE, METADATA, DATA, or *') + } + if (as.Date(from) > as.Date(to)){ + stop('"from" date must be after "to" date') + } + + req <- httr::GET('https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org') + if(req$status_code != 200) { + warning('Failed to read in', whitelist, '. Results will include admin submissions / edits.') + } + whitelist <- httr::content(req, "text") + + # Construct query and return results + q = sprintf('dateUploaded:["%sT00:00:00Z" TO "%sT23:59:59Z"] AND formatType:%s', from, to, formatType) + results <- dataone::query(mn, list(q = q, + fl = "identifier AND submitter AND dateUploaded AND formatType AND fileName", + rows = 10000), + as = "data.frame") + if (use_whitelist == T){ + # Filter out rows where the submitter is in the whitelist + results <- results[-which(stringr::str_detect(whitelist, results$submitter)),] + } + + # Return full names based on orcid Id + results$submitter_name <- purrr::map(results$submitter, get_orcid_name) %>% unlist() + results$submitter_email <- purrr::map(results$submitter, get_orcid_email) %>% unlist() + + # Arrange by dateUploaded + results <- dplyr::arrange(results, dateUploaded) + + return(results) +} + +#' Read a shapefile from a pid +#' +#' Read a shapefile from a pid that points to the zipped directory of the shapefile and associated files +#' on a given member node. +#' +#' @param mn (MNode) A DataOne Member Node +#' @param pid (character) An object identifier +#' +#' @return shapefile (sf) The shapefile as an `sf` object +#' +#' @export +#' +#' @author Jeanette Clark jclark@@nceas.ucsb.edu +#' +#' @examples +#' \dontrun{ +#' cn <- dataone::CNode('PROD') +#' adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +#' pid <- "urn:uuid:294a365f-c0d1-4cc3-a508-2e16260aa70c" +#' +#' shapefile <- read_zip_shapefile(adc, pid) +#' } +read_zip_shapefile <- function(mn, pid){ + + stopifnot(methods::is(mn, 'MNode')) + stopifnot(is.character(pid)) + + if (!requireNamespace("sf")) { + stop(call. = FALSE, + "The package 'sf' must be installed to run this function. ", + "Please install it and try again.") + } + + temp <- tempfile() + writeBin(dataone::getObject(mn, pid), temp) + zip_contents <- utils::unzip(temp, exdir = tempfile()) + + if (length(grep("shp", tools::file_ext(zip_contents))) != 1){ + stop("Zipped directory must contain one and only one .shp file") + } + + shapefile <- sf::st_read(zip_contents[grep("shp", tools::file_ext(zip_contents))], quiet = T, stringsAsFactors = F) + unlink(temp) + return(shapefile) +} + + +#' Recovers failed submissions +#' +#' Recovers failed submissions and write the new, valid EML to a given path +#' +#' @param node (MNode) The Member Node to publish the object to. +#' @param pid The PID of the EML metadata document to be recovered. +#' @param path path to write XML. +#' +#' @return recovers and write the valid EML to the indicated path +#' +#' @export +#' +#' @author Rachel Sun rachelsun@ucsb.edu +#' +#' @examples +#' \dontrun{ +#' # Set environment +#' cn <- dataone::CNode("STAGING2") +#' mn <- dataone::getMNode(cn,"urn:node:mnTestKNB") +#' pid <- "urn:uuid:b1a234f0-eed5-4f58-b8d5-6334ce07c010" +#' path <- tempfile("file", fileext = ".xml") +#' recover_failed_submission(mn, pid, path) +#' eml <- EML::read_eml(path) +#'} +recover_failed_submission <- function(node, pid, path){ + stopifnot(is(node, "MNode")) + stopifnot(is.character(pid), nchar(pid) > 0, arcticdatautils::object_exists(node, pid)) + + convert_to_text <- dataone::getObject(node, pid) %>% + rawToChar() + remove_error_tag <- paste0(convert_to_text, collapse = "") %>% + stringr::str_remove(".*`") %>% + stringr::str_remove("EML draft.*`") %>% + stringr::str_remove_all(" ") %>% + stringr::str_trim() + + doc <- EML::read_eml(remove_error_tag) + EML::eml_validate(doc) + EML::write_eml(doc, path) +} + +#' Add prov to a dummy package +#' +#' Adds provenance information to a dummy package for testing +#' +#' @param mn member node (the ADC test node) +#' @param rm_pid resource map identifier +#' +add_dummy_prov <- function(mn, rm_pid){ + if (mn@env == "prod") { + stop("Cannot create dummy prov on a production node.") + } + if (mn@identifier == "urn:node:mnTestARCTIC"){ + d1c <- dataone::D1Client("STAGING", "urn:node:mnTestARCTIC") + } + else if (mn@identifier != "urn:node:mnTestARCTIC"){ + stop("Use the ADC test node") + } + + pkg <- getDataPackage(d1c, id=rm_pid, lazyLoad=TRUE, limit="0MB", quiet=T) + objs <- selectMember(pkg, name="sysmeta@fileName", value='dummy_object') + if (length(objs) < 2){ + stop(.call = FALSE, + "Dummy package must have at least 2 data objects") + } + sourceObjId <- selectMember(pkg, name="sysmeta@fileName", value='dummy_object')[[1]] + outputObjId <- selectMember(pkg, name="sysmeta@fileName", value='dummy_object')[[2]] + + pkg <- describeWorkflow(pkg, sources=sourceObjId, derivations=outputObjId) + + resmapId_new <- uploadDataPackage(d1c, pkg, public = TRUE, quiet = FALSE) +} + diff --git a/R/inserting.R b/R/inserting.R index 702355e..ba9fd43 100644 --- a/R/inserting.R +++ b/R/inserting.R @@ -1,9 +1,7 @@ -#' inserting.R -#' -#' A set of utilities for inserting packages from files and folders on disk. +# A set of utilities for inserting packages from files and folders on disk -#' Create a package from a folder containing an ISO package (legacy) +#' Create a package from a folder containing an ISO package #' #' This function handles the process of inserting the original ISO package #' and updating it with an EML package. @@ -12,12 +10,12 @@ #' #' @param mn (MNode) The Member Node to create the packages on. #' @param path (character) The path to the folder containing the files. -#' @param data_pids (character) Optional. Manually specify the PIDs of data. This is useful if data were inserted outside this function and you want to re-use those objects. +#' @param data_pids (character) Optional. Manually specify the PIDs of data objects. +#' This is useful if data were inserted outside this function and you want to re-use those objects. #' -#' @return (list) All of the PIDs created. -#' @export +#' @return (list) A list of the PIDs created. #' -#' @examples +#' @noRd create_from_folder <- function(mn, path, data_pids=NULL) { # Validate args stopifnot(file.exists(path)) @@ -47,7 +45,7 @@ create_from_folder <- function(mn, path, data_pids=NULL) { publish_object(mn, data_path, data_format_ids[data_path]) }, error = function(e) { - log_message(e) + message(e) e }) @@ -65,10 +63,10 @@ create_from_folder <- function(mn, path, data_pids=NULL) { stopifnot(EML::eml_validate(eml_path)) eml_package <- publish_update(mn, - metadata_old_pid = iso_pid, - resmap_old_pid = iso_resmap_pid, - data_old_pids = data_pids, - metadata_file_path = eml_path) + metadata_pid = iso_pid, + resource_map_pid = iso_resmap_pid, + data_pids = data_pids, + metadata_path = eml_path) list(iso_pid = iso_pid, iso_resource_map_pid = iso_resmap_pid, diff --git a/R/interactive.R b/R/interactive.R index fac5d55..a780cac 100644 --- a/R/interactive.R +++ b/R/interactive.R @@ -1,8 +1,11 @@ -#' interactive.R -#' Author: Bryce Mecum -#' -#' Functions for interactive viewing of the Inventory and other objects. +# Functions for interactive viewing of the inventory and other objects + +#' View packages +#' +#' @param inventory (character) An inventory. +#' +#' @noRd view_packages <- function(inventory) { stopifnot(is.data.frame(inventory), nrow(inventory) > 0) @@ -25,6 +28,7 @@ view_packages <- function(inventory) { } +# Helper function for view_packages() wait_for_key <- function() { response <- readline(prompt = "Press [S]top or [C]ontinue") response <- tolower(response) @@ -32,6 +36,7 @@ wait_for_key <- function() { } +# Helper function for view_packages() show_package <- function(inventory, package) { cat(paste0("Package: ", package, "\n")) diff --git a/R/inventory.R b/R/inventory.R index a0e9a3c..00d7622 100644 --- a/R/inventory.R +++ b/R/inventory.R @@ -1,25 +1,21 @@ -#' inventory.R -#' Author: Bryce Mecum -#' -#' Functions relating to keeping up an inventory of files that exist on the KNB -#' and may or may not be copied to another computer and untarred. -#' +# Functions relating to keeping up an inventory of files that exist on the KNB +# and may or may not be copied to another computer and untarred -#' Create an empty inventory data.frame. This doesn't need to be a function -#' but I'm making it one in case the initialization routine becomes more -#' complicated. +#' Create an empty inventory data.frame #' -#' @return An empty data frame -#' @export +#' @return (data.frame) An empty data.frame. #' -#' @examples +#' @noRd inv_init <- function() { inventory <- data.frame(stringsAsFactors = FALSE) inventory } + +#' Load files into the inventory from a text file +#' #' Load files into the inventory from a text file. #' #' Files should be the output of the command: @@ -27,14 +23,12 @@ inv_init <- function() { #' you@server:/path/to/acadis$ find . -type f #' #' @param path (character) Path to a file containing a file listing. -#' @param inventory (character) A \code{data.frame}. -#' @param filter (logical) Filter out versioned datasets. Default is TRUE. +#' @param inventory (character) A data.frame. +#' @param filter (logical) Whether or not to filter out versioned datasets. #' -#' @return An inventory (data.frame) -#' -#' @export +#' @return (data.frame) An inventory. #' -#' @examples +#' @noRd inv_load_files <- function(inventory, path, filter=TRUE) { stopifnot(file.exists(path)) stopifnot("inventory" %in% ls(), @@ -60,7 +54,7 @@ inv_load_files <- function(inventory, path, filter=TRUE) { # Filter out versioned datasets if (filter) { size_before <- nrow(files) - files <- files[grep("v_\\d\\.\\d", files$file, invert = TRUE), "file", drop=FALSE] + files <- files[grep("v_\\d\\.\\d", files$file, invert = TRUE), "file", drop = FALSE] size_diff <- size_before - nrow(files) if (size_diff > 0) { cat("Removed", size_diff, "file(s) that were part of versioned datasets.\n") } @@ -100,17 +94,17 @@ inv_load_files <- function(inventory, path, filter=TRUE) { inventory } -#' Load file sizes into an inventory from a text file. Removes the column -#' 'size_bytes' from inventory before doing a left join. + +#' Load file sizes into an inventory from a text file #' -#' @param path (character) Path to a file containing sizes. -#' @param (data.frame) inventory A \code{data.frame}. +#' Removes the column 'size_bytes' from inventory before doing a left join. #' -#' @return (data.frame) An inventory +#' @param path (character) Path to a file containing sizes. +#' @param inventory (data.frame) A data.frame. #' -#' @export +#' @return (data.frame) An inventory. #' -#' @examples +#' @noRd inv_load_sizes <- function(inventory, path) { stopifnot(file.exists(path)) stopifnot("inventory" %in% ls(), @@ -141,18 +135,18 @@ inv_load_sizes <- function(inventory, path) { inventory } -#' Load checksums into the inventory file from a text file. This function -#' removes the column 'checksum_sha256' from inventory before doing a + +#' Load checksums into the inventory file from a text file +#' +#' This function removes the column 'checksum_sha256' from inventory before doing a #' left join. #' #' @param path (character) Path to a file containing sizes. #' @param inventory (data.frame) An inventory. #' -#' @return An inventory (data.frame) -#' -#' @export +#' @return (data.frame) An inventory. #' -#' @examples +#' @noRd inv_load_checksums <- function(inventory, path) { stopifnot(file.exists(path)) stopifnot("inventory" %in% ls(), @@ -191,16 +185,16 @@ inv_load_checksums <- function(inventory, path) { } -#' Load DOIs from a text file into the Inventory. +#' Load DOIs from a text file into the inventory #' -#' @param path Location of a text file with DOIs and file paths. (character) -#' @param inventory (data.frame) An inventory. +#' Load DOIs from a text file into the inventory. #' -#' @return (data.frame) The modified Inventory. +#' @param path (character) Location of a text file with DOIs and file paths. +#' @param inventory (data.frame) An inventory. #' -#' @export +#' @return (data.frame) The modified inventory. #' -#' @examples +#' @noRd inv_load_dois <- function(inventory, path) { stopifnot(file.exists(path)) stopifnot(is.data.frame(inventory), @@ -224,18 +218,18 @@ inv_load_dois <- function(inventory, path) { inventory } -#' Load identifiers into the inventory file(s) from a text file. This function -#' removes the column 'identifier' from inventory before doing a + +#' Load identifiers into the inventory file(s) from a text file +#' +#' This function removes the column 'identifier' from inventory before doing a #' left join. #' -#' @param path (character) Path(s) to files containing identifiers. +#' @param paths (character) Path(s) to files containing identifiers. #' @param inventory (data.frame) An inventory. #' #' @return (data.frame) An inventory. #' -#' @export -#' -#' @examples +#' @noRd inv_load_identifiers <- function(inventory, paths) { stopifnot(file.exists(path)) stopifnot(is.data.frame(inventory), @@ -265,16 +259,19 @@ inv_load_identifiers <- function(inventory, paths) { inventory } -#' Adds a set of extra columsn to the inventory that are useful for working + +#' Add a set of extra columns to the inventory +#' +#' Add a set of extra columns to the inventory that are useful for working #' with them. #' #' @param inventory (data.frame) An inventory. #' -#' @return An inventory (data.frame) +#' @return (data.frame) An inventory. #' -#' @export +#' @noRd inv_add_extra_columns <- function(inventory) { - stopifnot(class(inventory) == "data.frame", "file" %in% names(inventory)) + stopifnot(is(inventory, "data.frame"), "file" %in% names(inventory)) # Mark metadata files cat("Adding 'is_metadata' column.\n") @@ -358,17 +355,13 @@ inv_add_extra_columns <- function(inventory) { } - - - -#' Add a column for parent packages. +#' Add a column for parent packages #' -#' @param inventory (data.frame) An Inventory. +#' @param inventory (data.frame) An inventory. #' -#' @return inventory (data.frame) An Inventory. -#' @export +#' @return (data.frame) An inventory. #' -#' @examples +#' @noRd inv_add_parent_package_column <- function(inventory) { stopifnot(all(c("file", "package", "is_metadata", "depth") %in% names(inventory))) @@ -432,16 +425,12 @@ inv_add_parent_package_column <- function(inventory) { } - -#' Update an Inventory with a new Inventory. +#' Update an inventory with a new inventory #' -#' @param inventory (data.frame) The old Inventory. -#' @param new_state (data.frame) The new Inventory. +#' @param inventory (data.frame) The old inventory. +#' @param new_state (data.frame) The new inventory. #' -#' @return -#' @export -#' -#' @examples +#' @noRd inv_update <- function(inventory, new_state) { stopifnot(is.data.frame(inventory), is.data.frame(new_state), @@ -469,5 +458,3 @@ inv_update <- function(inventory, new_state) { inventory } - - diff --git a/R/marking.R b/R/marking.R index e1df919..fd7db07 100644 --- a/R/marking.R +++ b/R/marking.R @@ -1,11 +1,9 @@ -#' marking.R -#' Author: Bryce Mecum -#' -#' R commands for marking datasets before adding. +# Functions for marking datasets before adding -#' Divide packages and their files into themes. +#' Divide packages and their files into themes #' +#' @description #' Themes divide packages into groups based upon how the actions we will take #' to insert them. Packages are divided into one of three themes: #' @@ -25,18 +23,18 @@ #' All other packages not in the above themes. #' #' Note: Adds a 'theme' column to 'inventory'. -#' Note: Depeneds on the following columns: +#' Note: Depends on the following columns: #' #' - filename #' - package_nfiles #' #' -#' @param inventory (data.frame) An Inventory. +#' @param inventory (data.frame) An inventory. +#' @param nfiles_cutoff (integer) Number of cutoff files. #' -#' @return (data.frame) An Inventory. -#' @export +#' @return (data.frame) An inventory. #' -#' @examples +#' @noRd theme_packages <- function(inventory, nfiles_cutoff=100) { stopifnot(is.data.frame(inventory), "package_nfiles" %in% names(inventory)) @@ -77,4 +75,3 @@ theme_packages <- function(inventory, nfiles_cutoff=100) { inventory } - diff --git a/R/modify_metadata.R b/R/metadata.R similarity index 88% rename from R/modify_metadata.R rename to R/metadata.R index 870bcd9..7b87fdb 100644 --- a/R/modify_metadata.R +++ b/R/metadata.R @@ -1,24 +1,21 @@ -#' modify_metadata.R -#' Author: Bryce Mecum -#' -#' Functions related to fixing invalid ISO metadata. -#' -#' Some functions just test whether a validation issue is present. These are -#' prefixed with the text "test". Exactly what they are testing should be -#' described in the docstrings. -#' -#' Other functons fix the bad metadata in place (modifying the original file) -#' and these functions are prefixed with "fix_". Exactly what they are fixing -#' should be described in the docstrings. -#' -#' Example usage: -#' -#' # Find and fix documents in 'mydir' that have extra whitespace in their -#' # topicCategory element(s) -#' -#' the_files <- dir(mydir) -#' bad_enums <- the_files[which(sapply(the_files, test_has_bad_enum))] -# sapply(bad_enums, fix_bad_enums) +# Functions related to fixing invalid ISO metadata + +# Some functions just test whether a validation issue is present. These are +# prefixed with the text "test". Exactly what they are testing should be +# described in the docstrings. +# +# Other functons fix the bad metadata in place (modifying the original file) +# and these functions are prefixed with "fix_". Exactly what they are fixing +# should be described in the docstrings. +# +# Example usage: +# +# Find and fix documents in 'mydir' that have extra whitespace in their +# topicCategory element(s) +# +# the_files <- dir(mydir) +# bad_enums <- the_files[which(sapply(the_files, test_has_bad_enum))] +# sapply(bad_enums, fix_bad_enums) test_has_abstract <- function(path) { @@ -133,7 +130,7 @@ test_has_bad_enum <- function(path) { } -#' Fix a metadata record with a bad topicCategory. +#' Fix a metadata record with a bad topicCategory #' #' This is the case where the ISO schema says what's inside a #' gmd:MD_TopicCategoryCode element should match items from a controlled @@ -142,11 +139,9 @@ test_has_bad_enum <- function(path) { #' #' 'oceans' != ' oceans ' #' -#' @param path -#' -#' @return +#' @param path (character) A file path. #' -#' @examples +#' @noRd fix_bad_enum <- function(path) { stopifnot(file.exists(path)) @@ -185,11 +180,9 @@ fix_bad_enum <- function(path) { #' oceans #' #' -#' @param path +#' @param path (character) A file path. #' -#' @return -#' -#' @examples +#' @noRd fix_bad_topic <- function(path) { stopifnot(file.exists(path)) @@ -279,7 +272,7 @@ fix_bad_topic <- function(path) { } -#' Uses XMLStarlet to pretty-print/beautify an XML document. +#' Use XMLStarlet to pretty-print an XML document #' #' This command just runs `xmlstarlet path > path`, doing a simple #' pretty-printing of the file located at `path`. @@ -294,12 +287,11 @@ fix_bad_topic <- function(path) { #' format` on the same file as you redirect to, you get a weird parse error from #' xmlstarlet. #' +#' @param path (character) A file path. #' -#' @param path Path to your file you want pretty-printed. (character) -#' -#' @return Returns the result of the `system` command (0 = success) +#' @return The result of the `system` command (0 = success). #' -#' @examples +#' @noRd pretty_print <- function(path) { stopifnot(file.exists(path), file.info(path)$size > 0) diff --git a/R/packaging.R b/R/packaging.R index 090c2dc..a13011c 100644 --- a/R/packaging.R +++ b/R/packaging.R @@ -1,19 +1,13 @@ -#' package.R -#' Author: Bryce Mecum -#' -#' Code related to inserting datasets as Data Packages. - +# Functions for inserting datasets as data packages -#' Insert a file from a single row of the Inventory. +#' Insert a file from a single row of the inventory #' -#' @param inventory (data.frame) An Inventory. +#' @param inventory (data.frame) An inventory. #' @param file (character) The fully-qualified relative path to the file. See examples. #' @param env (list) Optional. Specify an environment. #' -#' @export -#' -#' @examples +#' @noRd insert_file <- function(inventory, file, env=NULL) { validate_inventory(inventory) stopifnot(is.character(file), nchar(file) > 0, file %in% inventory$file) @@ -27,7 +21,7 @@ insert_file <- function(inventory, file, env=NULL) { validate_environment(env) if (is_token_expired(env$mn)) { - log_message("Token is expired. Returning un-modified inventory.") + message("Token is expired. Returning un-modified inventory.") return(inventory) } @@ -42,7 +36,7 @@ insert_file <- function(inventory, file, env=NULL) { identifier_scheme <- env$data_identifier_scheme } - log_message(paste0("Using identifier scheme ", identifier_scheme, ".")) + message(paste0("Using identifier scheme ", identifier_scheme, ".")) # Determine the PID to use inventory_file[1,"pid"] <- get_or_create_pid(inventory_file[1,], @@ -50,7 +44,7 @@ insert_file <- function(inventory, file, env=NULL) { scheme = identifier_scheme) if (is.na(inventory_file[1,"pid"])) { - log_message(paste0("PID was NA for file ", file, ".\n")) + message(paste0("PID was NA for file ", file, ".\n")) return(inventory_file) } @@ -61,7 +55,7 @@ insert_file <- function(inventory, file, env=NULL) { env$rights_holder) if (is.null(sysmeta)) { - log_message(paste0("System Metadata creation failed for file ", file, ".\n")) + message(paste0("System Metadata creation failed for file ", file, ".\n")) return(inventory_file) } @@ -69,11 +63,11 @@ insert_file <- function(inventory, file, env=NULL) { inventory_file[1,"created"] <- create_object(inventory_file[1,], sysmeta, env$base_path, - mn) + env$mn) } if (inventory_file[1,"created"] == FALSE) { - log_message(paste0("Object creation failed for file ", file, ".\n")) + message(paste0("Object creation failed for file ", file, ".\n")) return(inventory_file) } @@ -81,18 +75,15 @@ insert_file <- function(inventory, file, env=NULL) { } -#' Create a single package Data Package from files in the Inventory. +#' Create a single data package from files in the inventory #' -#' @param inventory (data.frame) An Inventory. +#' @param inventory (data.frame) An inventory. #' @param package (character) The package identifier. -#' @param child_pids (character) Resource Map PIDs for child Data Packages. #' @param env (list) Environment variables. #' -#' @return A list containing PIDs and whether objects were inserted. (list) -#' -#' @export +#' @return (list) A list containing PIDs and whether objects were inserted. #' -#' @examples +#' @noRd insert_package <- function(inventory, package, env=NULL) { validate_inventory(inventory) stopifnot(is.character(package), nchar(package) > 0, package %in% inventory$package) @@ -106,7 +97,7 @@ insert_package <- function(inventory, package, env=NULL) { validate_environment(env) if (is_token_expired(env$mn)) { - log_message("Token is expired. Returning un-modified inventory.") + message("Token is expired. Returning un-modified inventory.") return(inventory) } @@ -148,7 +139,7 @@ insert_package <- function(inventory, package, env=NULL) { scheme = env$metadata_identifier_scheme) if (is.na(files[files_idx_metadata,"pid"])) { - log_message(paste0("Metadata PID was NA for package ", package, ".\n")) + message(paste0("Metadata PID was NA for package ", package, ".\n")) return(files) } @@ -159,7 +150,7 @@ insert_package <- function(inventory, package, env=NULL) { env$rights_holder) if (is.null(metadata_sysmeta)) { - log_message(paste0("System Metadata creation failed for metadata object in package ", package, ".\n")) + message(paste0("System Metadata creation failed for metadata object in package ", package, ".\n")) return(files) } @@ -177,11 +168,11 @@ insert_package <- function(inventory, package, env=NULL) { } if (files[files_idx_metadata,"created"] == FALSE) { - log_message(paste0("Object creation failed for metadata object in package ", package, ".\n")) + message(paste0("Object creation failed for metadata object in package ", package, ".\n")) return(files) } } else { - log_message("Skipped creating metadata because it was already created.") + message("Skipped creating metadata because it was already created.") } # Insert data files if needed @@ -190,11 +181,11 @@ insert_package <- function(inventory, package, env=NULL) { for (data_idx in files_idx_data) { # Skip if already created if (files[data_idx,"created"] == TRUE) { - log_message(paste0("File ", files[data_idx,"filename"], " in package ", package, " already created. Moving on to the next data object.\n")) + message(paste0("File ", files[data_idx,"filename"], " in package ", package, " already created. Moving on to the next data object.\n")) next } - log_message(paste0("Processing data index ", data_idx, " in package ", package, "\n")) + message(paste0("Processing data index ", data_idx, " in package ", package, "\n")) # Determine the PID to use for the data files[data_idx,"pid"] <- get_or_create_pid(files[data_idx,], @@ -202,7 +193,7 @@ insert_package <- function(inventory, package, env=NULL) { scheme = env$data_identifier_scheme) if (is.na(files[data_idx,"pid"])) { - log_message(paste0("Data PID was NA for file ", files[data_idx,'filename'], " in package ", package, ". Stopping early.\n")) + message(paste0("Data PID was NA for file ", files[data_idx,'filename'], " in package ", package, ". Stopping early.\n")) return(files) } @@ -213,7 +204,7 @@ insert_package <- function(inventory, package, env=NULL) { env$rights_holder) if (is.null(metadata_sysmeta)) { - log_message(paste0("System Metadata creation failed for metadata object in package ", package, ".\n")) + message(paste0("System Metadata creation failed for metadata object in package ", package, ".\n")) return(files) } @@ -226,35 +217,35 @@ insert_package <- function(inventory, package, env=NULL) { } if (files[data_idx,"created"] == FALSE) { - log_message(paste0("Object creation failed for metadata object in package ", package, ".\n")) + message(paste0("Object creation failed for metadata object in package ", package, ".\n")) return(files) } } } else { - log_message("Skipped creating data files because they were all created.") + message("Skipped creating data files because they were all created.") } # At this point, all of the metadata and data should be created, let's check if (!all(is.character(files[,"pid"])) && !all(files[,"created"] == TRUE)) { - log_message(paste0("Not all files in package ", package, " have PIDs and are created. Skipping Resource Map creation.\n")) + message(paste0("Not all files in package ", package, " have PIDs and are created. Skipping Resource Map creation.\n")) return(files) } # Generate and create() the Resource Map - log_message(paste0("Generating resource map for package ", package, ".\n")) + message(paste0("Generating resource map for package ", package, ".\n")) resource_map_pid <- generate_resource_map_pid(files[files_idx_metadata,"pid"]) resource_map_filepath <- generate_resource_map(files[files_idx_metadata,"pid"], files[files_idx_data,"pid"], child_pids) - log_message(paste0("Resource map PID is ", resource_map_pid, " for package with metadata file ", files[files_idx_metadata,"file"], ".\n")) + message(paste0("Resource map PID is ", resource_map_pid, " for package with metadata file ", files[files_idx_metadata,"file"], ".\n")) resource_map_format_id <- "http://www.openarchives.org/ore/terms" resource_map_checksum <- digest::digest(resource_map_filepath, algo = "sha256") resource_map_size_bytes <- file.info(resource_map_filepath)$size resource_map_file_name <- paste0(stringr::str_replace_all(resource_map_pid, ":", "_"), ".xml") - log_message(paste0("Generating system metadata for resource map for package ", package, ".\n")) + message(paste0("Generating system metadata for resource map for package ", package, ".\n")) resource_map_sysmeta <- new("SystemMetadata", identifier = resource_map_pid, formatId = resource_map_format_id, @@ -271,7 +262,7 @@ insert_package <- function(inventory, package, env=NULL) { resource_map_sysmeta <- add_access_rules(resource_map_sysmeta) - log_message(paste0("Creating resource map for package ", package, ".\n")) + message(paste0("Creating resource map for package ", package, ".\n")) create_resource_map_response <- NULL create_resource_map_response <- tryCatch({ dataone::createObject(env$mn, @@ -280,8 +271,8 @@ insert_package <- function(inventory, package, env=NULL) { sysmeta = resource_map_sysmeta) }, error = function(e) { - log_message(paste0("Error encountered while calling create() on the Resource Map for package ", package, ".\n")) - log_message(e$message) + message(paste0("Error encountered while calling create() on the Resource Map for package ", package, ".\n")) + message(e$message) e } ) @@ -295,23 +286,25 @@ insert_package <- function(inventory, package, env=NULL) { files$resmap_created <- TRUE } - log_message(print(files)) + message(print(files)) return(files) } -#' Create a resource map RDF/XML file and save is to a temporary path. + +#' Create a resource map RDF/XML file and save is to a temporary path +#' #' This is a convenience wrapper around the constructor of the `ResourceMap` #' class from `DataPackage`. #' -#' @param metadata_pid (character) PID of the metadata Object. -#' @param data_pids (character) PID(s) of the data Objects. -#' @param child_pids (character) Optional. PID(s) of child Resource Maps. -#' @param other_statements (data.frame) Extra statements to add to the Resource Map. -#' @param resource_map_pid +#' @param metadata_pid (character) PID of the metadata object. +#' @param data_pids (character) PID(s) of the data objects. +#' @param child_pids (character) Optional. PID(s) of child resource maps. +#' @param other_statements (data.frame) Extra statements to add to the resource map. +#' @param resource_map_pid (character) The PID of a resource map. #' @param resolve_base (character) Optional. The resolve service base URL. #' -#' @return Absolute path to the Resource Map on disk (character) +#' @return (character) Absolute path to the resource map on disk. #' #' @export #' @@ -323,11 +316,11 @@ insert_package <- function(inventory, package, env=NULL) { #' object="http://example.com/bar")) #' } generate_resource_map <- function(metadata_pid, - data_pids=NULL, - child_pids=NULL, - other_statements=NULL, - resolve_base="https://cn.dataone.org/cn/v2/resolve", - resource_map_pid=NULL) { + data_pids = NULL, + child_pids = NULL, + other_statements = NULL, + resolve_base = "https://cn.dataone.org/cn/v2/resolve", + resource_map_pid = NULL) { stopifnot(length(metadata_pid) == 1) # Generate a PID if needed @@ -443,11 +436,15 @@ generate_resource_map <- function(metadata_pid, } if (length(setdiff(names(relationships), names(other_statements))) != 0) { - warning("The column names of the relationships and other_statements data frames do not match: ", paste(names(relationships), collapse =", "), " vs. ", paste(names(other_statements), collapse = ", "), ".") + warning("The column names of the relationships and other_statements data frames do not match: ", paste(names(relationships), collapse = ", "), " vs. ", paste(names(other_statements), collapse = ", "), ".") } message("Adding ", nrow(other_statements), " custom statement(s) to the Resource Map.") + # Add an NA dataTypeURI to all the statements so the subsequent rbind works + # This is fine because they're all URIs and they don't need a datType + relationships$dataTypeURI <- NA + relationships <- rbind(relationships, other_statements) } @@ -461,8 +458,6 @@ generate_resource_map <- function(metadata_pid, resource_map <- new("ResourceMap", id = resource_map_pid) - message("Generating resource map with pids ", paste0(head(unlist(c(metadata_pid, data_pids, child_pids)), n = 10), collapse = ", "), ".") - resource_map <- datapack::createFromTriples(resource_map, relations = relationships, identifiers = unlist(c(metadata_pid, data_pids, child_pids)), @@ -483,14 +478,12 @@ generate_resource_map <- function(metadata_pid, outfilepath } -#' Generate a PID for a new resource map by appending "resource_map_" to it. -#' -#' @param metadata_pid + +#' Generate a PID for a new resource map by appending "resource_map_" to it #' -#' @return -#' @export +#' @param metadata_pid (character) A metadata PID. #' -#' @examples +#' @noRd generate_resource_map_pid <- function(metadata_pid) { stopifnot(is.character(metadata_pid), nchar(metadata_pid) > 0) @@ -502,17 +495,17 @@ generate_resource_map_pid <- function(metadata_pid) { paste0("resource_map_", metadata_pid) } -#' Get the already-minted PID from the inventory or mint a new one. + +#' Get the already-minted PID from the inventory or mint a new one #' #' @param file (data.frame) A single row from the inventory. #' @param mn (MNode) The Member Node that will mint the new PID, if needed. #' @param scheme (character) The identifier scheme to use. #' -#' @return The identifier (character) -#' @export +#' @return (character) The PID. #' -#' @examples -get_or_create_pid <- function(file, mn, scheme="UUID") { +#' @noRd +get_or_create_pid <- function(file, mn, scheme = "UUID") { stopifnot(is.data.frame(file), nrow(file) == 1, "pid" %in% names(file)) @@ -522,11 +515,11 @@ get_or_create_pid <- function(file, mn, scheme="UUID") { # Check if the existing PID is a valid one if (!is.na(pid) && is.character(pid) && nchar(pid) > 0) { - log_message(paste0("Using existing PID of ", pid, "\n")) + message(paste0("Using existing PID of ", pid, "\n")) return(pid) } - log_message(paste0("Minting new PID with scheme ", scheme, "\n")) + message(paste0("Minting new PID with scheme ", scheme, "\n")) if (scheme == "UUID") { pid <- paste0("urn:uuid:", uuid::UUIDgenerate()) @@ -538,8 +531,8 @@ get_or_create_pid <- function(file, mn, scheme="UUID") { dataone::generateIdentifier(mn, scheme) }, error = function(e) { - log_message(paste0("Error generating identifier for file ", file[1,"file"], ".\n")) - log_message(e$message) + message(paste0("Error generating identifier for file ", file[1,"file"], ".\n")) + message(e$message) e } ) @@ -553,21 +546,21 @@ get_or_create_pid <- function(file, mn, scheme="UUID") { pid } -#' Create a sysmeta object. + +#' Create a sysmeta object #' #' This is a wrapper function around the constructor for a -#' dataone::SystemMetadata object. +#' SystemMetadata object. #' #' @param file (data.frame) A single row from the inventory. -#' @param base_path (character) The path prefix to use with the contents of `file[1,"filename]` that -#' will be used to locate the file on disk. +#' @param base_path (character) The path prefix to use with the contents of `file[1,"filename"]` that +#' will be used to locate the file on disk. #' @param submitter (character) The submitter DN string for the object. #' @param rights_holder (character) The rights holder DN string for the object. #' -#' @return The sysmeta object (dataone::SystemMetadata) -#' @export +#' @return (SystemMetadata) The sysmeta object. #' -#' @examples +#' @noRd create_sysmeta <- function(file, base_path, submitter, rights_holder) { stopifnot(is.data.frame(file), nrow(file) == 1) @@ -613,8 +606,8 @@ create_sysmeta <- function(file, base_path, submitter, rights_holder) { }, error = function(e) { - log_message(paste0("Error generated during the call to create_sysmeta() for the metadata file ", file[1,"file"], "\n")) - log_message(e$message) + message(paste0("Error generated during the call to create_sysmeta() for the metadata file ", file[1,"file"], "\n")) + message(e$message) e } ) @@ -628,29 +621,27 @@ create_sysmeta <- function(file, base_path, submitter, rights_holder) { } -#' Create an object from a row of the inventory. +#' Create an object from a row of the inventory #' -#' @param file (data.frame)A row from the inventory. +#' @param file (data.frame) A row from the inventory. #' @param sysmeta (SystemMetadata) The file's sysmeta. -#' @param base_path (character) -#' @param mn (MNode) -#' -#' @return -#' @export +#' @param base_path (character) Base path, to be appended to the 'file' +#' column to find the file to upload. +#' @param env (list) An environment. #' -#' @examples -create_object <- function(file, sysmeta, base_path, mn) { +#' @noRd +create_object <- function(file, sysmeta, base_path, env) { stopifnot(is.data.frame(file), nrow(file) == 1, "pid" %in% names(file), "file" %in% names(file)) - stopifnot(class(sysmeta) == "SystemMetadata") + stopifnot(is(sysmeta, "SystemMetadata")) stopifnot(is.character(base_path), nchar(base_path) > 0) - stopifnot(class(env$mn) == "MNode") + stopifnot(is(env$mn, "MNode")) # Set the return value to FALSE by default result <- FALSE @@ -673,8 +664,8 @@ create_object <- function(file, sysmeta, base_path, mn) { sysmeta = sysmeta) }, error = function(e) { - log_message(paste0("Error generated during the call to MNStorage.create() for the metadata file ", file[1,"file"], "\n")) - log_message(e$message) + message(paste0("Error generated during the call to MNStorage.create() for the metadata file ", file[1,"file"], "\n")) + message(e$message) e }) @@ -689,28 +680,25 @@ create_object <- function(file, sysmeta, base_path, mn) { # Print out the insert rate time_diff_sec <- round(as.numeric(Sys.time() - before_time, "secs"), 2) mb_per_s <- round(file_size_mb / time_diff_sec, 2) - log_message(paste0("Inserted ", file_size_mb, " MB in ", time_diff_sec, " s (", mb_per_s, " MB/s)\n")) + message(paste0("Inserted ", file_size_mb, " MB in ", time_diff_sec, " s (", mb_per_s, " MB/s)\n")) if (is.character(response) && nchar(response) > 0) { result <- TRUE - log_message(paste0("Successfully created object with PID ", response, " for file ", file[1,"file"], ".\n")) + message(paste0("Successfully created object with PID ", response, " for file ", file[1,"file"], ".\n")) } else { result <- FALSE - log_message(paste0("Failed to created object with PID ", response, " for file ", file[1,"file"], ".\n")) + message(paste0("Failed to created object with PID ", response, " for file ", file[1,"file"], ".\n")) } result } -#' Validate an Inventory. -#' -#' @param inventory +#' Validate an inventory #' -#' @return -#' @export +#' @param inventory (data.frame) An inventory. #' -#' @examples +#' @noRd validate_inventory <- function(inventory) { stopifnot(is.data.frame(inventory), nrow(inventory) > 0, @@ -725,14 +713,12 @@ validate_inventory <- function(inventory) { "ready") %in% names(inventory))) } -#' Validate an environment. -#' -#' @param env + +#' Validate an environment #' -#' @return -#' @export +#' @param env (character) An environment. #' -#' @examples +#' @noRd validate_environment <- function(env) { env_default_components <- c("base_path", "alternate_path", @@ -741,7 +727,7 @@ validate_environment <- function(env) { "mn_base_url", "submitter", "rights_holder") - stopifnot(class(env) == "list", + stopifnot(is(env, "list"), length(env) > 0) stopifnot(!is.null(env), length(env) > 0) stopifnot(all(env_default_components %in% names(env))) @@ -751,15 +737,12 @@ validate_environment <- function(env) { } -#' Calculate a set of child PIDs for a given package. +#' Calculate a set of child PIDs for the given package #' -#' @param inventory -#' @param package -#' -#' @return -#' @export +#' @param inventory (data.frame) An inventory. +#' @param package (character) The package identifier. #' -#' @examples +#' @noRd determine_child_pids <- function(inventory, package) { stopifnot(all(c("package", "parent_package", "is_metadata") %in% names(inventory))) @@ -778,8 +761,9 @@ determine_child_pids <- function(inventory, package) { } -#' Update a package with modified metadata. +#' Update a package with modified metadata #' +#' @description #' The modified metadata should be set in the `env` variable. For example, if #' your original metadata is: #' @@ -801,13 +785,13 @@ determine_child_pids <- function(inventory, package) { #' exist on the Member Node before doing their work and will call createObject() #' instead of updateObject() if the object didn't already exist. #' -#' @param inventory (data.frame) -#' @param package (character) +#' @param inventory (data.frame) An inventory. +#' @param package (character) The package identifier. +#' @param env (character) Environment. #' -#' @return TRUE or FALSE depending on sucess (logical) -#' @export +#' @return (logical) #' -#' @examples +#' @noRd update_package <- function(inventory, package, env = NULL) { @@ -830,7 +814,7 @@ update_package <- function(inventory, # Check the token if (is_token_expired(env$mn)) { - log_message("Token is expired. Returning un-modified inventory.") + message("Token is expired. Returning un-modified inventory.") return(package_files) } @@ -839,28 +823,28 @@ update_package <- function(inventory, data_file_idx <- which(package_files$is_metadata == FALSE) stopifnot(length(metadata_file_idx) == 1) - log_message(paste0("Updating package ", package, "\n")) + message(paste0("Updating package ", package, "\n")) # Find the converted EML documente if (!file.exists(env$alternate_path)) { - log_message(paste0("Alternate path location of ", env$alternate_path, " does not exist. Returning.\n")) + message(paste0("Alternate path location of ", env$alternate_path, " does not exist. Returning.\n")) return(package_files) } eml_file_path <- path_join(c(env$alternate_path, package_files[metadata_file_idx,"file"])) if (!file.exists(eml_file_path)) { - log_message(paste0("EML file not found at path ", eml_file_path, ".\n")) + message(paste0("EML file not found at path ", eml_file_path, ".\n")) return(package_files) } - log_message(paste0("Converted document is at ", eml_file_path, "\n")) + message(paste0("Converted document is at ", eml_file_path, "\n")) # Get a new PID and replace the packageId new_pid <- package_files[metadata_file_idx,"pid"] old_pid <- package_files[metadata_file_idx,"pid_old"] - log_message(paste0("Updating object with old PID ", old_pid, " with new PID ", new_pid, ".\n")) + message(paste0("Updating object with old PID ", old_pid, " with new PID ", new_pid, ".\n")) stopifnot(!is.na(new_pid), is.character(new_pid), @@ -878,12 +862,12 @@ update_package <- function(inventory, # Call MNStorage.update on the metadata object # Does this PID even exist? Stop now if it doesn't. if (!object_exists(env$mn_base_url, old_pid)) { - log_message(paste0("Object with PID ", old_pid, " not found. Returning package.\n")) + message(paste0("Object with PID ", old_pid, " not found. Returning package.\n")) return(package_files) } # Update the object if it doesn't exist on the MN - log_message(paste0("Checking if metadata object with pid ", new_pid, " already exists.\n")) + message(paste0("Checking if metadata object with pid ", new_pid, " already exists.\n")) if (!object_exists(env$mn_base_url, new_pid)) { sysmeta <- new("SystemMetadata", @@ -910,8 +894,8 @@ update_package <- function(inventory, sysmeta = sysmeta) }, error = function(e) { - log_message(paste0("Error produced during call to updateObject for metadata ", package_files[metadata_file_idx,"file"], " in package ", package, "\n")) - log_message(e) + message(paste0("Error produced during call to updateObject for metadata ", package_files[metadata_file_idx,"file"], " in package ", package, "\n")) + message(e) e }) @@ -923,10 +907,10 @@ update_package <- function(inventory, # Set the updated flag to TRUE package_files$updated <- TRUE - log_message(paste0("Inserted updated metadata object for package ", package, "\n")) + message(paste0("Inserted updated metadata object for package ", package, "\n")) } else { - log_message(paste0("Metadata object already exists. Moving on to the resource map.\n")) + message(paste0("Metadata object already exists. Moving on to the resource map.\n")) } @@ -973,12 +957,12 @@ update_package <- function(inventory, # Check if the new resource map already exists if (object_exists(env$mn_base_url, resource_map_pid)) { - log_message(paste0("The new resource map with PID ", resource_map_pid, " already exists. Finishing up.\n")) + message(paste0("The new resource map with PID ", resource_map_pid, " already exists. Finishing up.\n")) } else { # Now check if the OLD resource map exists if (!object_exists(env$mn_base_url, old_resmap_pid)) { - log_message(paste0("Old resource map with PID ", resource_map_pid, " doesn't exist. Using createObject instead of updateObject.\n")) + message(paste0("Old resource map with PID ", resource_map_pid, " doesn't exist. Using createObject instead of updateObject.\n")) create_response <- tryCatch({ dataone::createObject(x = env$mn, @@ -987,17 +971,17 @@ update_package <- function(inventory, sysmeta = resource_map_sysmeta) }, error = function(e) { - log_message(paste0("Error produced during call to createObject for resource map ", resource_map_pid, " in package ", package, "\n")) - log_message(e) + message(paste0("Error produced during call to createObject for resource map ", resource_map_pid, " in package ", package, "\n")) + message(e) e }) if (inherits(create_response, "error")) { - log_message("There was an error calling createObject. Returning package files as is.\n") + message("There was an error calling createObject. Returning package files as is.\n") return(package_files) } - log_message(create_response) + message(create_response) } else { # Update the old resource map update_response <- tryCatch({ @@ -1008,17 +992,17 @@ update_package <- function(inventory, sysmeta = resource_map_sysmeta) }, error = function(e) { - log_message(paste0("Error produced during call to updateObject for resource map ", package_files[metadata_file_idx,"file"], " in package ", package, "\n")) - log_message(e) + message(paste0("Error produced during call to updateObject for resource map ", package_files[metadata_file_idx,"file"], " in package ", package, "\n")) + message(e) e }) if (inherits(update_response, "error")) { - log_message("There was an error calling updateObject Returning package files as is.\n") + message("There was an error calling updateObject Returning package files as is.\n") return(package_files) } - log_message(update_response) + message(update_response) } } @@ -1026,117 +1010,60 @@ update_package <- function(inventory, } -#' Parse a Resource Map into a data.frame +#' Parse a resource map into a data.frame +#' +#' Parse a resource map into a data.frame. #' -#' @param path (character) Path to the resource map (an RDF/XML file) +#' @param path (character) Path to the resource map (an RDF/XML file). +#' +#' @return (data.frame) The statements in the resource map. #' -#' @return (data.frame) The statements in the Resource Map #' @export #' #' @examples +#'\dontrun{ +#'# Set environment +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' +#' rm_pid <- "resource_map_urn:uuid:6b2e5753-4a94-4e6f-971c-36420a446ecb" +#' +#' # Write resource map to file +#' writeBin(getObject(mn, rm_pid), "~/Documents/resource_map.rdf") +#' df <- parse_resource_map("~/Documents/resource_map.rdf") +#' } parse_resource_map <- function(path) { stopifnot(file.exists(path)) - world <- new("World") - storage <- new("Storage", - world, - "hashes", - name = "", - options = "hash-type='memory'") - model <- new("Model", world, storage, options = "") - parser <- new("Parser", world) - - redland::parseFileIntoModel(parser, world, path, model) - - query <- new("Query", - world, - "select ?s ?p ?o where { ?s ?p ?o }", - base_uri = NULL, - query_language = "sparql", - query_uri = NULL) - - queryResult <- redland::executeQuery(query, model) - - statements <- data.frame() - - while(!is.null(result <- redland::getNextResult(queryResult))) { - statements <- rbind(statements, - data.frame(subject = result$s, - predicate = result$p, - object = result$o, - stringsAsFactors = FALSE)) - } - - # Remove < and > around URIs. We do this because redland needs them to be - # without those characters or it complains about being unable to convert into - # a qname - statements$subject <- stringr::str_replace_all(statements$subject, "^[<]", "") - statements$predicate <- stringr::str_replace_all(statements$predicate, "^[<]", "") - statements$object <- stringr::str_replace_all(statements$object, "^[<]", "") - statements$subject <- stringr::str_replace_all(statements$subject, "[>]$", "") - statements$predicate <- stringr::str_replace_all(statements$predicate, "[>]$", "") - statements$object <- stringr::str_replace_all(statements$object, "[>]$", "") - - statements + rm <- new("ResourceMap") + datapack::parseRDF(rm, path) + datapack::getTriples(rm) } #' Filter statements related to packaging #' +#' This is intended to be called after [datapack::getTriples()] has been called +#' on a resource map. +#' #' This function was written specifically for the case of updating a resource #' map while preserving any extra statements that have been added such as PROV #' statements. Statements are filtered according to these rules: #' -#' 1. If the subject or object is the ore:ResourceMap resource -#' 2. If the subject or object is the ore:Aggregation resource -#' 3. If the predicate is cito:documents or cito:isDocumentedBy -#' 4. Once filters 1-3 have been executed, any remaining triples are considered -#' for removal if they look like dangling dc:identifier statements -#' -#' The consequence of filter 4 is that dc:identifier statements are left in if -#' they are still in use by another statement +#' @param statements (data.frame) A set of statements to be filtered. #' -#' @param statements (data.frame) A set of Statements to be filtered -#' -#' @return (data.frame) The filtered Statements -#' @export +#' @return (data.frame) The filtered statements. #' -#' @examples +#' @noRd filter_packaging_statements <- function(statements) { stopifnot(is.data.frame(statements)) if (nrow(statements) == 0) return(statements) - # Collect URIs we're going to use to filter by - resource_map_uri <- statements[grepl("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", statements$predicate) & grepl("http://www.openarchives.org/ore/terms/ResourceMap", statements$object),"subject"] - aggregation_uri <- statements[grepl("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", statements$predicate) & grepl("http://www.openarchives.org/ore/terms/Aggregation", statements$object),"subject"] - - # Filter statements by subject - statements <- statements[!(statements$subject %in% c(resource_map_uri, aggregation_uri)),] - - # Filter statements by object - statements <- statements[!(statements$object %in% c(resource_map_uri, aggregation_uri)),] - # Filter cito:documents / cito:isDocumentedBy statements statements <- statements[!(statements$predicate == "http://purl.org/spar/cito/documents"),] statements <- statements[!(statements$predicate == "http://purl.org/spar/cito/isDocumentedBy"),] - - # If this is a simple package without extra statements, then we should just be - # left with some dc:identifier statements left over. Here we try to detect - # that case by collecting the unique subjects taking part in dc:identifier - # statements and filtering statements about subjects with only one statement - # about them - - dc_identifiers <- unique(statements[statements$predicate == "http://purl.org/dc/terms/identifier", "subject"]) - - for (identifier in dc_identifiers) { - if (nrow(statements[statements$subject == identifier | statements$object == identifier,]) == 1) { - statements <- statements[!(statements$subject == identifier | statements$object == identifier),] - } - } - - # Remove introduced by the second filter statement - statements <- statements[complete.cases(statements),] + statements <- statements[!(statements$predicate == "http://purl.org/dc/terms/identifier"),] + statements <- statements[!((statements$predicate == "http://xmlns.com/foaf/0.1/name" & statements$object == "DataONE R Client")),] statements } - diff --git a/R/quality.R b/R/quality.R index db21541..172a205 100644 --- a/R/quality.R +++ b/R/quality.R @@ -1,18 +1,27 @@ -#' Score a metadata document against a MetaDIG Suite +# Functions related to metadata quality + + +#' Score a metadata document against a MetaDIG suite +#' +#' This function scores a metadata document against a MetaDIG suite. +#' The default suite is for the Arctic Data Center. +#' +#' @param document (eml/character) Either an EML object or path to a file on disk. +#' @param suite_id (character) Specify a suite ID. Should be one of . #' -#' @param document (eml or character) Either an EML object or path to a file on disk. -#' @param suite_id (character) Optional. Specificy a suite ID. Should be one of https://quality.nceas.ucsb.edu/quality/suites +#' @return (data.frame) A sorted data.frame of check results. #' -#' @return (data.frame) A sorted table of Check results #' @export #' #' @examples +#' \dontrun{ #' # Check an EML document you are authoring #' library(EML) -#' mdq_run(new("eml)) +#' mdq_run(new("eml")) #' #' # Check an EML document that is saved to disk #' mdq_run(system.file("examples", "example-eml-2.1.1.xml", package = "EML")) +#' } mdq_run <- function(document, suite_id = "arctic.data.center.suite.1") { if (is(document, "eml")) { metadata_path <- tempfile() diff --git a/R/sysdata.rda b/R/sysdata.rda deleted file mode 100644 index acb96fc..0000000 Binary files a/R/sysdata.rda and /dev/null differ diff --git a/R/sysmeta.R b/R/sysmeta.R index 09d64b6..47018b0 100644 --- a/R/sysmeta.R +++ b/R/sysmeta.R @@ -1,103 +1,23 @@ -#' sysmeta.R -#' -#' Utility functions for modifying System Metadata objects. - - -#' Add access rules to the sysmeta object -#' -#' This is a function because I add a set of standard set of access rules to -#' every object and the access rules don't differ across objects. -#' -#' @param sysmeta (SystemMetadata) The SystemMetadata to add rules to. -#' -#' @return The modified SystemMetadata object -#' @export -#' -#' @examples -add_access_rules <- function(sysmeta) { - if (!inherits(sysmeta, "SystemMetadata")) { - stop(paste0("An object of class ", class(sysmeta), " was passed in. Returning unmodified object.\n")) - } - - # Add myself explicitly as changePermission/write so I can update objects - # in the dev environment - if (env_get() == "development") { - sysmeta <- datapack::addAccessRule(sysmeta, env_load(skip_mn = TRUE)$submitter, "changePermission") - } - - sysmeta <- datapack::addAccessRule(sysmeta, "CN=arctic-data-admins,DC=dataone,DC=org", "read") - sysmeta <- datapack::addAccessRule(sysmeta, "CN=arctic-data-admins,DC=dataone,DC=org", "write") - sysmeta <- datapack::addAccessRule(sysmeta, "CN=arctic-data-admins,DC=dataone,DC=org", "changePermission") - - sysmeta -} - - -#' Remove all public read access rules from a System Metadata document -#' -#' @param sysmeta (SystemMetadata) The System Metadata document. to change. -#' -#' @return (SystemMetadata) The potentially modified System Metadata document. -#' -#' @examples -#' library(datapack) -#' sm <- new("SystemMetadata) -#' sm <- addAccessRule(sm, "public", "read") -#' sm@accessPolicy -#' sm <- remove_public_access(sm) -#' sm@accessPolicy -remove_public_access <- function(sysmeta) { - if (!inherits(sysmeta, "SystemMetadata")) { - stop(paste0("An object of class ", class(sysmeta), " was passed in. Returning unmodified object.\n")) - } +# Utility functions for modifying System Metadata objects - sysmeta@accessPolicy <- sysmeta@accessPolicy[!(grepl("public", sysmeta@accessPolicy$subject) & grepl("read", sysmeta@accessPolicy$permission)),] - sysmeta -} - - -#' Adds access to the given System Metadata for the arctic-data-admins group -#' -#' @param sysmeta +#' Replace subjects in the accessPolicy section of a System Metadata entries #' -#' @return -#' @export -#' -#' @examples -add_admin_group_access <- function(sysmeta) { - if (!inherits(sysmeta, "SystemMetadata")) { - log_message(paste0("An object of class ", class(sysmeta), " was passed in. Returning unmodified object.\n")) - return(sysmeta) - } - - sysmeta <- datapack::addAccessRule(sysmeta, "CN=arctic-data-admins,DC=dataone,DC=org", "read") - sysmeta <- datapack::addAccessRule(sysmeta, "CN=arctic-data-admins,DC=dataone,DC=org", "write") - sysmeta <- datapack::addAccessRule(sysmeta, "CN=arctic-data-admins,DC=dataone,DC=org", "changePermission") - - sysmeta -} - - -#' Replace subjects in the accessPolicy section of a System Metadata entries. -#' -#' This function was written out to fix capitalization errors but in a set of +#' This function was written out to fix capitalization errors in a set of #' existing System Metadata entries but can be used to replace any subject. #' -#' #' @param sysmeta (SystemMetadata) The System Metadata object. #' @param from (character) The DN string to replace. -#' @param to (character) The DN string to put in place of `from`. +#' @param to (character) The DN string to put in place of 'from'. #' -#' @return The modified System Metadata (SystemMetadata) -#' @export +#' @return (SystemMetadata) The modified System Metadata. #' -#' @examples +#' @noRd replace_subject <- function(sysmeta, - from="cn=arctic-data-admins,dc=dataone,dc=org", - to="CN=arctic-data-admins,DC=dataone,DC=org") { + from = "cn=arctic-data-admins,dc=dataone,dc=org", + to = "CN=arctic-data-admins,DC=dataone,DC=org") { if (!inherits(sysmeta, "SystemMetadata")) { - log_message(paste0("An object of class ", class(sysmeta), " was passed in. Returning unmodified object.\n")) + message(paste0("An object of class ", class(sysmeta), " was passed in. Returning unmodified object.\n")) return(sysmeta) } @@ -122,11 +42,10 @@ replace_subject <- function(sysmeta, #' @param sysmeta (SystemMetadata) The System Metadata object to clear the replication policy of. #' #' @return (SystemMetadata) The modified System Metadata object. -#' @export #' -#' @examples +#' @noRd clear_replication_policy <- function(sysmeta) { - if (!(class(sysmeta) == "SystemMetadata")) { + if (!(is(sysmeta, "SystemMetadata"))) { stop("First argument was not of class SystemMetadata.") } @@ -136,3 +55,146 @@ clear_replication_policy <- function(sysmeta) { sysmeta } + + +#' Get system metadata for all elements of a data package +#' +#' This function retrieves the system metadata for all elements of a data package and returns them as a list. +#' It is useful for inspecting system metadata for an entire data package and identifying changes where needed. +#' +#' @param mn (MNode) The Member Node to query. +#' @param resource_map_pid (character) The PID for a resource map. +#' @param nmax (numeric) The maximum number of system metadata objects to return. +#' @param child_packages (logical) If parent package, whether or not to include child packages. +#' +#' @return (list) A list of system metadata objects. +#' +#' @import dataone +#' @importFrom methods is +#' @importFrom methods new +#' +#' @export +#' +#' @examples +#'\dontrun{ +#' cn_staging <- CNode("STAGING") +#' adc_test <- getMNode(cn_staging, "urn:node:mnTestARCTIC") +#' +#' rm_pid <- "resource_map_urn:uuid:..." +#' +#' all <- get_all_sysmeta(adc_test, rm_pid) +#' +#' # View in viewer to inspect +#' View(all) +#' +#' # Print specific elements to console +#' all[[1]]@rightsHolder +#' +#' # Create separate object +#' sysmeta_md <- all[[2]] +#' } +get_all_sysmeta <- function(mn, resource_map_pid, nmax = 1000, child_packages = FALSE) { + stopifnot(methods::is(mn, "MNode")) + stopifnot(is.character(resource_map_pid), nchar(resource_map_pid) > 0, length(resource_map_pid) == 1) + stopifnot(is_resource_map(mn, resource_map_pid)) + stopifnot(is.numeric(nmax), length(nmax) == 1 , nmax >= 0) + stopifnot(is.logical(child_packages), length(child_packages) == 1) + + query_params <- paste("identifier:", resource_map_pid, "+OR+resourceMap:", resource_map_pid, "", sep = "\"") + response <- dataone::query(mn, list(q = query_params, rows = as.character(nmax))) + + if (length(response) == 0) { + stop(paste0("No results were found when searching for a package with resource map '", resource_map_pid, + "'.\nThis could be caused by not having appropriate access to read the resource map.")) + } + + if (length(response) == nmax) { + warning(paste("Query returned the maximum number of objects. It is possible there are more to retrieve.", + "\nSpecify a larger number of objects with the 'nmax' argument.")) + } + + # Check if child package + if (response[[1]]$formatType == "RESOURCE" && !is.null(response[[1]]$resourceMap)) { + message("The data package with this resource map is a child package.") + } + # Check if parent package + if (any(unlist(lapply(response[2:length(response)], function(x) ifelse(x$formatType == "RESOURCE", TRUE, FALSE))))) { + message("The data package with this resource map is a parent package.") + if (child_packages == TRUE) { + children <- Filter(function(x) x$formatType == "RESOURCE", response[2:length(response)]) + children2 <- vector("list", length(children)) + for (i in seq_along(children)) { + child_resource_map_pid <- children[[i]]$identifier + query_params2 <- paste("identifier:", child_resource_map_pid, "+OR+resourceMap:", child_resource_map_pid, "", sep = "\"") + children2[[i]] <- dataone::query(mn, list(q = query_params2, rows = as.character(nmax))) + } + } + } + + # Translate fields from Solr query to formal class SystemMetadata + translate <- function(x) { + sysmeta <- methods::new("SystemMetadata") + + sysmeta@serialVersion <- sysmeta@serialVersion + sysmeta@identifier <- if (is.null(x$identifier)) {sysmeta@identifier} else {x$identifier} + sysmeta@formatId <- if (is.null(x$formatId)) {sysmeta@formatId} else {x$formatId} + sysmeta@size <- if (is.null(x$size)) {sysmeta@size} else {x$size} + sysmeta@checksum <- if (is.null(x$checksum)) {sysmeta@checksum} else {x$checksum} + sysmeta@checksumAlgorithm <- if (is.null(x$checksumAlgorithm)) {sysmeta@checksumAlgorithm} else {x$checksumAlgorithm} + sysmeta@submitter <- if (is.null(x$submitter)) {sysmeta@submitter} else {x$submitter} + sysmeta@rightsHolder <- if (is.null(x$rightsHolder)) {sysmeta@rightsHolder} else {x$rightsHolder} + read <- if (is.null(x$readPermission)) {} else {data.frame(subject = unlist(x$readPermission), + permission = "read")} + write <- if (is.null(x$writePermission)) {} else {data.frame(subject = unlist(x$writePermission), + permission = "write")} + change <- if (is.null(x$changePermission)) {} else {data.frame(subject = unlist(x$changePermission), + permission = "changePermission")} + sysmeta@accessPolicy <- rbind(read, write, change) + sysmeta@replicationAllowed <- if (is.null(x$replicationAllowed)) {sysmeta@replicationAllowed} else {x$replicationAllowed} + sysmeta@numberReplicas <- if (is.null(x$numberReplicas)) {sysmeta@numberReplicas} else {x$numberReplicas} + sysmeta@preferredNodes <- if (is.null(x$preferredReplicationMN)) {sysmeta@preferredNodes} else {x$preferredReplicationMN} + sysmeta@blockedNodes <- if (is.null(x$blockedReplicationMN)) {sysmeta@blockedNodes} else {x$blockedReplicationMN} + sysmeta@obsoletes <- if (is.null(x$obsoletes)) {sysmeta@obsoletes} else {x$obsoletes} + sysmeta@obsoletedBy <- if (is.null(x$obsoletedBy)) {sysmeta@obsoletedBy} else {x$obsoletedBy} + sysmeta@archived <- sysmeta@archived + sysmeta@dateUploaded <- if (is.null(x$dateUploaded)) {sysmeta@dateUploaded} else {as.character(x$dateUploaded)} + sysmeta@dateSysMetadataModified <- if (is.null(x$dateModified)) {sysmeta@dateSysMetadataModified} else {as.character(x$dateModified)} + sysmeta@originMemberNode <- if (is.null(x$datasource)) {sysmeta@originMemberNode} else {x$datasource} + sysmeta@authoritativeMemberNode <- if (is.null(x$authoritativeMN)) {sysmeta@authoritativeMemberNode} else {x$authoritativeMN} + sysmeta@seriesId <- if (is.null(x$seriesId)) {sysmeta@seriesId} else {x$seriesId} + sysmeta@mediaType <- if (is.null(x$mediaType)) {sysmeta@mediaType} else {x$mediaType} + sysmeta@fileName <- if (is.null(x$fileName)) {sysmeta@fileName} else {x$fileName} + sysmeta@mediaTypeProperty <- if (is.null(x$mediaTypeProperty)) {sysmeta@mediaTypeProperty} else {x$mediaTypeProperty} + + return(sysmeta) + } + + if (child_packages) { + other <- Filter(function(x) x$formatType != "RESOURCE", response[2:length(response)]) + response2 <- c(list(response[[1]]), other) + parent <- lapply(response2, translate) + names(parent) <- unlist(lapply(parent, function(x) {x@fileName})) + for (i in seq_along(parent)) { + if (is.na(names(parent)[i])) {names(parent)[i] <- paste0("missing_fileName", i)} + } + + child <- lapply(children2, function(x) {lapply(x, translate)}) + for (i in seq_along(child)) { + names(child[[i]]) <- unlist(lapply(child[[i]], function(x) {x@fileName})) + for (j in seq_along(child[[i]])) { + if (is.na(names(child[[i]])[j])) {names(child[[i]])[j] <- paste0("missing_fileName", j)} + } + } + names(child) <- paste0("child", seq_along(child)) + + all <- c(parent, child) + } else { + all <- lapply(response, translate) + names(all) <- unlist(lapply(all, function(x) {x@fileName})) + for (i in seq_along(all)) { + if (is.na(names(all)[i])) {names(all)[i] <- paste0("missing_fileName", i)} + } + } + + return(all) +} diff --git a/R/util.R b/R/util.R index f8de522..6526065 100644 --- a/R/util.R +++ b/R/util.R @@ -1,16 +1,15 @@ -#' util.R -#' Author: Bryce Mecum -#' -#' General utility functions that may be later merged into other files. +# Various utility functions -#' Extracts the local identifier for an ACADIS ISO metadata XML file. +#' Extract the local identifier for an ACADIS ISO metadata XML file #' #' @param type (character) A string, one of "gateway" or "field-projects". -#' @param file (character) A string, a connection, or raw vector (same as xml2::read_xml). +#' @param file (character) A string, connection, or raw vector +#' (same as [xml2::read_xml()]). #' -#' @returns The identifier string. (character) - +#' @return (character) The identifier string. +#' +#' @noRd extract_local_identifier <- function(type, file) { stopifnot(is.character(type), length(type) == 1) stopifnot(type %in% c("gateway", "field-projects")) @@ -43,118 +42,15 @@ extract_local_identifier <- function(type, file) { } -dataone_format_mappings <- list("avi" = "ideo/avi", - "bmp" = "image/bmp", - "bz2" = "application/x-bzip2", - "csv" = "text/csv", - "fasta" = "application/x-fasta", - "gif" = "image/gif", - "gz" = "application/x-gzip", - "html" = "text/html", - "jpg" = "image/jpeg", - "jpeg" = "image/jpeg", - "kml" = "application/vnd.google-earth.kml/xml", - "mp4" = "video/mp4", - "mpg" = "video/mpeg", - "mpeg" = "video/mpeg", - "n3" = "text/n3", - "nc" = "netCDF-3", - "pdf" = "application/pdf", - "png" = "image/png", - "ppt" = "application/vnd.ms-powerpoint", - "py" = "application/x-python", - "rdf" = "application/rdf/xml", - "tar" = "application/x-tar", - "tif" = "image/tiff", - "tiff" = "image/tiff", - "ttl" = "text/turtle", - "txt" = "text/plain", - "wmv" = "video/x-ms-wmv", - "xls" = "application/vnd.ms-excel", - "xlsx" = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - "xml" = "application/xml", - "zip" = "application/zip") - - -#' Guess format from filename for a vector of filenames. -#' -#' @param filenames (character) -#' -#' @return (character) DataOne format identifiers strings. -#' @export -#' -#' @examples -guess_format_id <- function(filenames) { - extensions <- tolower(tools::file_ext(filenames)) - filetypes <- vector(mode = "character", length = length(extensions)) - - for (i in seq_len(length(extensions))) { - extension <- extensions[i] - - if (extension %in% names(dataone_format_mappings)) { - filetypes[i] <- dataone_format_mappings[extension][[1]] - } else { - filetypes[i] <- "application/octet-stream" - } - } - - filetypes -} - - -#' Determine the DataONE format ID for the NetCDF file provided by path. -#' -#' @param path (character) Full or relative path to the file in question. -#' -#' @return (character) The DataONE format ID. -#' @export -#' -#' @examples -get_netcdf_format_id <- function(path) { - stopifnot(is.character(path), - nchar(path) > 0, - file.exists(path)) - - # Try to open the file, capturing errors - cdf_file <- try({ - ncdf4::nc_open(path) - }) - - # If we failed to open the file, we can assume it's not a valid NetCDF file - # and we just return application/octet-stream as the format ID - if (inherits(cdf_file, "try-error")) { - return("application/octet-stream") - } - - # Since we got this far, continue detecting the format - stopifnot("format" %in% names(cdf_file)) - format_string <- cdf_file$format - stopifnot(is.character(format_string), - nchar(format_string) > 0) - format_id = "" - - if (format_string == "NC_FORMAT_CLASSIC") { - format_id = "netCDF-3" - } else if (format_string == "NC_FORMAT_NETCDF4") { - format_id = "netCDF-4" - } else { - stop("Unknown NetCDF format discovered.") - } - - return(format_id) -} - - -#' Print a random dataset. +#' Print a random dataset #' #' @param inventory (data.frame) An inventory. #' @param theme (character) Optional. A package theme name. #' @param n (numeric) Optional. The number of files to show. #' -#' @return Nothing. -#' @export +#' @return `NULL` #' -#' @examples +#' @noRd show_random_dataset <- function(inventory, theme=NULL, n=10) { stopifnot(is.data.frame(inventory), all(c("file", "folder", "filename", "theme") %in% names(inventory))) @@ -178,7 +74,7 @@ show_random_dataset <- function(inventory, theme=NULL, n=10) { base_dir <- sampled_pkg[which(sampled_pkg$is_metadata == TRUE),"folder"] # startDebug - if (length(base_dir) != 0){ + if (length(base_dir) != 0) { browser() } # endDebug @@ -200,7 +96,7 @@ show_random_dataset <- function(inventory, theme=NULL, n=10) { } -#' Log a message to the console and to a logfile. +#' Log a message to the console and to a logfile #' #' Reads from the environment variable 'LOG_PATH' and uses the value set there #' to decide the location of the log file. If that envvar isn't set, it defaults @@ -208,10 +104,9 @@ show_random_dataset <- function(inventory, theme=NULL, n=10) { #' #' @param message (character) Your log message. #' -#' @return Nothing. -#' @export +#' @return `NULL` #' -#' @examples +#' @noRd log_message <- function(message=NULL) { if (is.null(message) || !is.character(message) || nchar(message) < 1) { invisible(return(FALSE)) @@ -238,18 +133,28 @@ log_message <- function(message=NULL) { } -#' Check if an object exists on a Member Node. +#' Check if an object exists on a Member Node #' #' This is a simple check for the HTTP status of a /meta/{PID} call on the -#' provided member node. +#' provided Member Mode. #' -#' @param node (MNode|CNode) The Node to query. -#' @param pid (character) PID to check the existence of. +#' @param node (MNode) The Member Node to query. +#' @param pids (character) The PID(s) to check the existence of. #' #' @return (logical) Whether the object exists. +#' #' @export #' #' @examples +#'\dontrun{ +#' # Set environment +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +#' "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +#' +#' object_exists(mn, pids) +#' } object_exists <- function(node, pids) { stopifnot(class(node) %in% c("MNode", "CNode"), is.character(pids)) @@ -264,7 +169,7 @@ object_exists <- function(node, pids) { e }) - if (inherits(sysmeta, "error") || class(sysmeta) != "SystemMetadata") { + if (inherits(sysmeta, "error") || !is(sysmeta, "SystemMetadata")) { result[i] <- FALSE } else { result[i] <- TRUE @@ -275,16 +180,22 @@ object_exists <- function(node, pids) { } -#' Convert and ISO document to EML using an XSLT. +#' Convert an ISO document to EML using an XSLT #' #' Leave style=NA if you want to use the default ISO-to-EML stylesheet. +#' #' @param path (character) Path to the file to convert. #' @param style (xslt) The XSLT object to be used for transformation. #' #' @return (character) Location of the converted file. +#' #' @export #' #' @examples +#'\dontrun{ +#'iso_path <- "~/Docuements/ISO_metadata.xml" +#'eml_path <- convert_iso_to_eml(iso_path) +#'} convert_iso_to_eml <- function(path, style=NA) { # Load the XSLT if needed if (is.na(style)) { @@ -303,14 +214,18 @@ convert_iso_to_eml <- function(path, style=NA) { } -#' Extract the EML responsible-party blocks in a document, and parse the -#' surName field to create proper givenName/surName structure +#' Modify name structure for EML parties +#' +#' Extract the EML responsible-party blocks in a document and parse the +#' surName field to create proper givenName/surName structure. +#' +#' @param path (character) The path to the EML document to process. #' -#' @param path file path to the EML document to process (character) +#' @return (character) The path to the converted EML file. #' -#' @return path (character) Path to the converted EML file. #' @import XML -#' @export +#' +#' @noRd substitute_eml_party <- function(path) { # Read in the EML document doc = XML::xmlParse(path) @@ -339,15 +254,19 @@ substitute_eml_party <- function(path) { return(path) } + +#' Change EML name +#' #' Utility function to extract a name string from an XML individualName node, -#' parse it into tokens,and reformat the individualName with new children nodes +#' parse it into tokens,and reformat the individualName with new children nodes. #' -#' @param party the XML node containing a subclass of eml:ResponsibleParty +#' @param party The XML node containing a subclass of eml:ResponsibleParty. #' -#' @return the modified XML node +#' @return The modified XML node. #' #' @import XML -#' @export +#' +#' @noRd change_eml_name <- function(party) { # Check if there is an individualName element exists if (length(XML::getNodeSet(party, "./individualName")) == 0) { @@ -375,6 +294,12 @@ change_eml_name <- function(party) { # Replace commas with spaces user_name <- stringr::str_replace_all(user_name, ",", "") + + if (!requireNamespace("humaniformat")) { + stop("The package 'humaniformat' is required to run this function. ", + "Please install it.") + } + parsed_name <- humaniformat::parse_names(user_name) # Create the new node to hold the parts of the name @@ -409,23 +334,22 @@ change_eml_name <- function(party) { } +#' Replace EML packageId with value +#' #' Replace the EML 'packageId' attribute on the root element with a #' certain value. #' #' @param path (character) Path to the XML file to edit. #' @param replacement (character) The new value. #' -#' @return -#' @export -#' -#' @examples +#' @noRd replace_package_id <- function(path, replacement) { stopifnot(file.exists(path)) stopifnot(is.character(replacement), nchar(replacement) > 0) doc <- EML::read_eml(path) - stopifnot(class(doc) == "eml") + stopifnot(is(doc, "eml")) doc@packageId <- new("xml_attribute", replacement) doc@system <- new("xml_attribute", "arcticdata") @@ -435,15 +359,13 @@ replace_package_id <- function(path, replacement) { path } -#' Adds a string to the title element in the given file. + +#' Add a string to the title element in the given file #' #' @param path (character) Path to the XML file to edit. #' @param string (character) The new value. #' -#' @return -#' @export -#' -#' @examples +#' @noRd add_string_to_title <- function(path, string) { stopifnot(file.exists(path)) stopifnot(is.character(string), @@ -470,15 +392,14 @@ add_string_to_title <- function(path, string) { } -#' Add a set of additional identifiers to an EML document. +#' Add a set of additional identifiers to an EML document #' #' @param path (character) Path to the EML document. #' @param identifiers (character) Set of identifiers to add. #' #' @return (character) Path to the modified document. -#' @export #' -#' @examples +#' @noRd add_additional_identifiers <- function(path, identifiers) { stopifnot(is.character(path), nchar(path) > 0, @@ -502,17 +423,16 @@ add_additional_identifiers <- function(path, identifiers) { } -#' (Intelligently) join (possibly redudant) path parts together. +#' Intelligently join possibly redundant path parts together #' #' Joins path strings like "./" to "./my/dir" as "./my/dir" instead of as #' "././my/dir. #' #' @param path_parts (character) #' -#' @return (character)The joined path string. -#' @export +#' @return (character) The joined path string. #' -#' @examples +#' @noRd path_join <- function(path_parts=c("")) { result <- paste0(path_parts, collapse = "") @@ -532,57 +452,40 @@ path_join <- function(path_parts=c("")) { result } -#' Test whether an object is a particular format ID. -#' -#' @param node (MNode|CNode) The Coordinating/Member Node to run the query on. -#' @param pids (character) -#' @param format_id (character) -#' -#' @return (logical) -#' @export -#' -#' @examples -is_format_id <- function(node, pids, format_id) { - stopifnot(class(node) %in% c("MNode", "CNode")) - stopifnot(all(is.character(pids)), - all(lengths(pids) > 0)) - stopifnot(is.character(format_id), - nchar(format_id) > 0) - result <- vector("logical", length(pids)) - - for (i in seq_along(pids)) { - result[i] <- dataone::getSystemMetadata(node, pids[i])@formatId == format_id - } - - result -} - -#' Determines whether the object with the given PID is a resource map. +#' Determine whether the object with the given PID is a resource map #' #' @param node (MNode|CNode) The Coordinating/Member Node to run the query on. -#' @param pids (character) Vector of PIDs +#' @param pids (character) Vector of PIDs. #' -#' @return (logical) Whether or not the object(s) are resource maps -#' @export +#' @return (logical) Whether or not the object(s) are resource maps. #' -#' @examples +#' @noRd is_resource_map <- function(node, pids) { is_format_id(node, pids, "http://www.openarchives.org/ore/terms") } -#' Test whether the object is obsoleted by another object. +#' Test whether the object is obsoleted by another object #' #' @param node (MNode|CNode) The Coordinating/Member Node to run the query on. #' @param pids (character) One or more PIDs to query against. #' #' @return (logical) Whether or not the object is obsoleted by another object. +#' #' @export #' #' @examples +#'\dontrun{ +#' # Set environment +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pid <- "urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1" +#' +#' is_obsolete(mn, pid) +#'} is_obsolete <- function(node, pids) { - stopifnot(class(node) == "MNode" || class(node) == "CNode") + stopifnot(is(node, "MNode") || is(node, "CNode")) stopifnot(is.character(pids)) response <- vector(mode = "logical", length = length(pids)) @@ -597,12 +500,11 @@ is_obsolete <- function(node, pids) { } -#' Returns the subject of the set dataone_test_token +#' Return the subject of the set dataone_test_token #' #' @return (character) The token subject. -#' @export #' -#' @examples +#' @noRd get_token_subject <- function() { info <- dataone::getTokenInfo(dataone::AuthenticationManager()) @@ -620,46 +522,48 @@ get_token_subject <- function() { } -#' Get the identifier from a DataONE response. +#' Get the identifier from a DataONE response #' -#' Example resposne: +#' Example response: #' #' #' urn:uuid:12aaf494-5840-434d-9cdb-c2597d58543e #' #' -#' @param dataone_response ("XMLInternalDocument" "XMLAbstractDocument") +#' @param dataone_response ("XMLInternalDocument"/"XMLAbstractDocument") #' #' @return (character) The PID. -#' @export #' -#' @examples +#' @noRd get_identifier <- function(dataone_response) { stopifnot("XMLInternalDocument" %in% class(dataone_response)) - XML::xmlValue(XML::getNodeSet(dataone_response, "//d1:identifier/text()", namespaces = c("d1"="http://ns.dataone.org/service/types/v1"))[[1]]) + XML::xmlValue(XML::getNodeSet(dataone_response, "//d1:identifier/text()", namespaces = c("d1" = "http://ns.dataone.org/service/types/v1"))[[1]]) } -#' Helper function to generate a new UUID PID. +#' Generate a new UUID PID +#' +#' Generate a new UUID PID. #' #' @return (character) A new UUID PID. +#' #' @export #' #' @examples +#' id <- new_uuid() new_uuid <- function() { paste0("urn:uuid:", uuid::UUIDgenerate()) } -#' Get the current package version. +#' Get the current package version #' #' This function parses the installed DESCRIPTION file to get the latest -#' version. +#' version of the package. #' #' @return (character) The current package version. -#' @export #' -#' @examples +#' @noRd get_current_version <- function() { desc_file <- file.path(system.file("DESCRIPTION", package = "arcticdatautils")) desc_lines <- readLines(desc_file) @@ -667,12 +571,11 @@ get_current_version <- function() { } -#' Use the GitHub API to find the latest release for the package. +#' Use the GitHub API to find the latest release for the package #' #' @return (character) The latest release. -#' @export #' -#' @examples +#' @noRd get_latest_release <- function() { req <- httr::GET("https://api.github.com/repos/NCEAS/arcticdatautils/releases") content <- httr::content(req) @@ -684,13 +587,12 @@ get_latest_release <- function() { } +#' Warn if package version is outdated +#' #' Warns if the currently-installed version of the package is not the same #' version as the latest release on GitHub. #' -#' @return -#' @export -#' -#' @examples +#' @noRd warn_current_version <- function() { current <- get_current_version() latest <- get_latest_release() @@ -701,15 +603,25 @@ warn_current_version <- function() { } +#' Get the PIDs of all versions of an object +#' #' Get the PIDs of all versions of an object. #' -#' @param node (MNode|CNode) The node to query. +#' @param node (MNode) The Member Node to query. #' @param pid (character) Any object in the chain. #' #' @return (character) A vector of PIDs in the chain, in order. +#' #' @export #' #' @examples +#'\dontrun{ +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pid <- "urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1" +#' +#' ids <- get_all_versions(mn, pid) +#' } get_all_versions <- function(node, pid) { stopifnot(class(node) %in% c("MNode", "CNode")) stopifnot(is.character(pid), @@ -753,22 +665,31 @@ get_all_versions <- function(node, pid) { } -#' Get a structured list of PIDs for the objects in a package. +#' Get a structured list of PIDs for the objects in a package #' -#' This is a wrapper function around `get_package_direct` which takes either -#' a resource map PID or a metadata PID as its `pid` argument. +#' Get a structured list of PIDs for the objects in a package, +#' including the resource map, metadata, and data objects. #' -#' @param node (MNode|CNode) The Coordinating/Member Node to run the query on. -#' @param pid (character) The the metadata PID of the package. +#' @param node (MNode/CNode) The Coordinating/Member Node to run the query on. +#' @param pid (character) The the resource map PID of the package. #' @param file_names (logical) Whether to return file names for all objects. #' @param rows (numeric) The number of rows to return in the query. This is only -#' useful to set if you are warned about the result set being truncated. +#' useful to set if you are warned about the result set being truncated. Defaults to 5000. #' #' @return (list) A structured list of the members of the package. +#' #' @export #' #' @examples -get_package <- function(node, pid, file_names=FALSE, rows=1000) { +#'\dontrun{ +#' #Set environment +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' pid <- "resource_map_urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1" +#' +#' ids <- get_package(mn, pid) +#' } +get_package <- function(node, pid, file_names=FALSE, rows=5000) { stopifnot(is(node, "MNode") || is(node, "CNode")) stopifnot(is.character(pid), nchar(pid) > 0) @@ -779,7 +700,7 @@ get_package <- function(node, pid, file_names=FALSE, rows=1000) { resource_map_pids <- pid } else { warning(call. = FALSE, - paste0("The PID '", pid, "' is not for a Resource Map Object so the most likely candidate was found. This is usally fine! Specify a Resource Map PID instead to stop getting this warning.")) + paste0("The PID '", pid, "' is not for a Resource Map Object so the most likely candidate was found. This is usually fine! Specify a Resource Map PID instead to stop getting this warning.")) resource_map_pids <- find_newest_resource_map(node, pid) } @@ -803,18 +724,18 @@ get_package <- function(node, pid, file_names=FALSE, rows=1000) { } -#' Get a structured list of PIDs for the objects in a package. +#' Get a structured list of PIDs for the objects in a package #' -#' @param node (MNode|CNode) The Coordinating/Member Node to run the query on. +#' This function is used within [get_package()]. +#' +#' @param node (MNode/CNode) The Coordinating/Member Node to run the query on. #' @param pid (character) The the metadata PID of the package. #' @param file_names (logical) Whether to return file names for all objects. #' @param rows (numeric) The number of rows to return in the query. This is only -#' useful to set if you are warned about the result set being truncated. +#' useful to set if you are warned about the result set being truncated. Defaults to 5000. #' -#' @return -#' -#' @examples -get_package_direct <- function(node, pid, file_names=FALSE, rows = 1000) { +#' @noRd +get_package_direct <- function(node, pid, file_names = FALSE, rows = 5000) { stopifnot(is(node, "MNode") || is(node, "CNode")) stopifnot(is.character(pid), nchar(pid) > 0) @@ -880,18 +801,17 @@ get_package_direct <- function(node, pid, file_names=FALSE, rows = 1000) { response } -#' Get the resource map(s) for the given object. + +#' Get the resource map(s) for the given object #' -#' @param node (MNode|CNode) The Node to query. +#' @param node (MNode/CNode) The Member Node to query. #' @param pid (character) The object to get the resource map(s) for. -#' @param rows (numeric) Optional. The number of query results to return. This -#' shouldn't need to be modified and the default, 1000, is very likely to be -#' more than enough. +#' @param rows (numeric) Optional. The number of query results to return. +#' The default, 1000, is very likely to be more than enough. #' #' @return (character) The resource map(s) that contain `pid`. -#' @export #' -#' @examples +#' @noRd find_newest_resource_map <- function(node, pid, rows = 1000) { stopifnot(class(node) %in% c("MNode", "CNode")) stopifnot(is.character(pid), @@ -935,14 +855,17 @@ find_newest_resource_map <- function(node, pid, rows = 1000) { find_newest_object(node, all_resource_map_pids) } -#' Find the newest (by dateUploaded) object within a given set of objects. + +#' Find the newest object within the given set of objects #' -#' @param node (MNode | CNode) The node to query -#' @param identiifers (character) One or more identifiers +#' Find the newest object, based on dateUploaded, within the given set of objects. +#' +#' @param node (MNode/CNode) The Member Node to query. +#' @param identifiers (character) One or more identifiers. #' @param rows (numeric) Optional. Specify the size of the query result set. #' #' @return (character) The PID of the newest object. In the case of a tie (very -#' unlikely) the first element, in natural order, is returned. +#' unlikely) the first element, in natural order, is returned. #' #' @export #' @@ -977,18 +900,17 @@ find_newest_object <- function(node, identifiers, rows=1000) { } -#' Filters PIDs that are obsolete. +#' Filters PIDs that are obsolete #' #' Whether or not a PID is obsolete is determined by whether its "obsoletedBy" -#' property is set to another PID (TRUE) or is NA (FALSE). +#' property is set to another PID (`TRUE`) or is `NA` (`FALSE`). #' -#' @param node (MNode|CNode) The Node to query. +#' @param node (MNode|CNode) The Member Node to query. #' @param pids (character) PIDs to check the obsoletion state of. #' #' @return (character) PIDs that are not obsoleted by another PID. -#' @export #' -#' @examples +#' @noRd filter_obsolete_pids <- function(node, pids) { pids[is.na(sapply(pids, function(pid) { dataone::getSystemMetadata(node, pid)@obsoletedBy }, USE.NAMES = FALSE))] } @@ -996,11 +918,12 @@ filter_obsolete_pids <- function(node, pids) { #' Get an approximate list of the datasets in a user's profile #' -#' This function is intended to be (poorly) simulate what a user sees when they +#' This function is intended to (poorly) simulate what a user sees when they #' browse to their "My Data Sets" page (their #profile URL). It uses a similar -#' Solr to what Metacat UI uses to generate the list. The results of this -#' function may be the same as what's on the #profile page but may be missing -#' some of the user's datasets when: +#' Solr query to what Metacat UI uses to generate the list. +#' +#' The results of this function may be the same as what's on the #profile page +#' but may be missing some of the user's datasets when: #' #' - The user has any datasets in their #profile that the person running the #' query (you) can't \code{read}. This is rare on arcticdata.io but possible @@ -1011,10 +934,11 @@ filter_obsolete_pids <- function(node, pids) { #' #' @param mn (MNode) The Member Node to query. #' @param subject (character) The subject to find the datasets for. This is -#' likely going to be your ORCID, e.g. http://orcid.org.... +#' likely going to be an ORCID, e.g. http://orcid.org.... #' @param fields (character) A vector of Solr fields to return. #' -#' @return (data.frame) data.frame with the results. +#' @return (data.frame) A data.frame with the results. +#' #' @export #' #' @examples @@ -1026,6 +950,13 @@ filter_obsolete_pids <- function(node, pids) { #' #' // Get a custom set of fields #' view_profile(mn, me, "origin") +#' +#' # Set environment +#' cn <- CNode("STAGING2") +#' mn <- getMNode(cn,"urn:node:mnTestKNB") +#' +#' package_df <- view_profile(mn, "http://orcid.org/0000-0003-4703-1974", fields = c("title")) +#' #' } view_profile <- function(mn, subject, fields=c("identifier", "title")) { stopifnot(is(mn, "MNode")) @@ -1064,19 +995,22 @@ view_profile <- function(mn, subject, fields=c("identifier", "title")) { results } + #' Show the indexing status of a set of PIDs #' -#' @param mn (MNode) The Member Node to query -#' @param pids (character|list) One or more PIDs (or list of PIDs) +#' Show the indexing status of a set of PIDs. +#' +#' @param mn (MNode) The Member Node to query. +#' @param pids (character/list) One or more PIDs. +#' +#' @return `NULL` #' -#' @return Nothing #' @export #' #' @examples #' \dontrun{ #' # Create a package then check its indexing status #' library(dataone) -#' library(arcticdatautils) #' mn <- MNode(...) #' pkg <- create_dummy_package(mn) #' show_indexing_status(mn, pkg) @@ -1092,7 +1026,7 @@ show_indexing_status <- function(mn, pids) { stopifnot(is.character(pids), length(pids) > 0) - expected_pids <- unlist(pkg, use.names = FALSE) + expected_pids <- unlist(pids, use.names = FALSE) indexed_pids <- c() # Accumulates the PIDs we find in the index pb <- txtProgressBar(min = 0, max = length(expected_pids), style = 3) @@ -1124,3 +1058,38 @@ show_indexing_status <- function(mn, pids) { close(pb) } +#' Set public READ access on all versions of PIDs in data package. +#' +#' Set public READ access on all versions of PIDs in data package. +#' +#' @param mn (MNode) The Member Node to query. +#' @param resource_map_pid (character) The resource map identifier (PID). +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' cn_staging <- CNode('STAGING') +#' adc_test <- getMNode(cn_staging,'urn:node:mnTestARCTIC') +#' # Create a dummy package then create another version with 'publish_update()' +#' pkg <- create_dummy_package(adc_test) +#' remove_public_read(mn, unlist(pkg)) +#' pkg_v2 <- publish_update(adc_test, pkg$metadata, pkg$resource_map, pkg$data, public = FALSE) +#' # Set public read on all versions +#' set_public_read_all_versions(adc_test, pkg$resource_map) +#' } +set_public_read_all_versions <- function(mn, resource_map_pid) { + stopifnot(is(mn, 'MNode')) + stopifnot(is_token_set(mn)) + stopifnot(is.character(resource_map_pid)) + stopifnot(arcticdatautils:::is_resource_map(mn, resource_map_pid)) + + pids <- get_package(mn, resource_map_pid) %>% + unlist() + versions <- lapply(pids, get_all_versions, node = mn) %>% + unlist() %>% + unique() + set_public_read(mn, versions) + + return(invisible()) +} diff --git a/README.md b/README.md index 2925eda..b5e15a8 100644 --- a/README.md +++ b/README.md @@ -1,43 +1,66 @@ -# arcticadatautils +# arcticdatautils -The `articadatautils` R package contains code for: +[![Travis build status](https://travis-ci.org/NCEAS/arcticdatautils.svg?branch=master)](https://travis-ci.org/NCEAS/arcticdatautils) -- Inserting large numbers of files into Metacat -- High-level [rdataone](https://github.com/DataONEorg/rdataone) wrappers for -editing objects and Data Packages +The `arcticdatautils` package contains code for doing lots of useful stuff that's too specific for the [dataone](https://github.com/DataONEorg/rdataone) package: -Note: The package is intended to be used by NCEAS staff and may not make much sense to others. +- Inserting large numbers of files into a Metacat Member Node +- High-level [dataone](https://github.com/DataONEorg/rdataone) wrappers for working with Objects and Data Packages that streamline Arctic Data Center operations +Note: The package is intended to be used by NCEAS staff and may not make much sense to others. ## Installing -I recommend installing from the latest [release](https://github.com/NCEAS/arcticdatautils/releases) (aka tag) instead of from `master`. Install from release with: +We recommend installing from the latest [release](https://github.com/NCEAS/arcticdatautils/releases) (aka tag) instead of from `master`. Install the latest release with the [`remotes`](https://github.com/r-lib/remotes) package: -``` -devtools::install_github("NCEAS/arcticdatautils", ref = "{TAG_NAME_HERE}") +```r +remotes::install_github("nceas/arcticdatautils@*release") ``` If you're feeling adventurous, you can install from the bleeding edge: +```r +remotes::install_github("nceas/arcticdatautils") ``` -devtools::install_github("NCEAS/arcticdatautils") -``` - ## Contributing -Please submit suggestions or bugs as [Issues](https://github.com/NCEAS/arcticdatautils/issues). +- Please submit suggestions or bugs as [Issues](https://github.com/NCEAS/arcticdatautils/issues). +- Pull Requestss (PR) should target the `master` branch +- Before submitting a PR, please: + - Re-document and commit any `*.Rd` file changes + > `devtools::document()` + - R CMD CHECK and fix any issues related to your changes + > `devtools::check()` + - Run the tests and make sure they all pass + > `devtools::test()` + +## Support +- Explore the pkgdown site for documentation: https://nceas.github.io/arcticdatautils/ +- Please submit bugs or other comments as [Issues](https://github.com/NCEAS/arcticdatautils/issues) +- Maintainers of the package are @jeanetteclark and @jagoldstein ## Testing -Some tests are dependent on an authentication token being set and be skipped if one is not set. +Note: The test suite contains a set of tests that call out to a remote server and whether or not these tests are run depends on whether `is_token_set()` returns true which just checks whether the `dataone_test_token` option is set. -``` -# Skips tests that depend on a Metacat instance: +If you don't want to run integration tests: + +```r devtools::test() +``` + +If you *do* want to run integration tests + +1. Visit [https://test.arcticdata.io](https://test.arcticdata.io) +2. Log in +3. Navigate to My Profile > Settings > Authentication Token +4. Click the "Token for DataONE R" tab +5. Copy the code snippet +6. Modify the first line in the snippet below: -# Set a token to run skipped tests: -options(dataone_test_token = "...") +```r +options(dataone_test_token = "{YOUR_TOKEN_HERE}") # <- Modify this line devtools::test() ``` diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 0000000..3b06c25 --- /dev/null +++ b/_pkgdown.yml @@ -0,0 +1,4 @@ +navbar: + right: + - icon: fa-github + href: https://github.com/NCEAS/arcticdatautils diff --git a/docs/LICENSE b/docs/LICENSE deleted file mode 100644 index 5e0fd33..0000000 --- a/docs/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ -Apache License -Version 2.0, January 2004 -http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - -"License" shall mean the terms and conditions for use, reproduction, -and distribution as defined by Sections 1 through 9 of this document. - -"Licensor" shall mean the copyright owner or entity authorized by -the copyright owner that is granting the License. - -"Legal Entity" shall mean the union of the acting entity and all -other entities that control, are controlled by, or are under common -control with that entity. For the purposes of this definition, -"control" means (i) the power, direct or indirect, to cause the -direction or management of such entity, whether by contract or -otherwise, or (ii) ownership of fifty percent (50%) or more of the -outstanding shares, or (iii) beneficial ownership of such entity. - -"You" (or "Your") shall mean an individual or Legal Entity -exercising permissions granted by this License. - -"Source" form shall mean the preferred form for making modifications, -including but not limited to software source code, documentation -source, and configuration files. - -"Object" form shall mean any form resulting from mechanical -transformation or translation of a Source form, including but -not limited to compiled object code, generated documentation, -and conversions to other media types. - -"Work" shall mean the work of authorship, whether in Source or -Object form, made available under the License, as indicated by a -copyright notice that is included in or attached to the work -(an example is provided in the Appendix below). - -"Derivative Works" shall mean any work, whether in Source or Object -form, that is based on (or derived from) the Work and for which the -editorial revisions, annotations, elaborations, or other modifications -represent, as a whole, an original work of authorship. For the purposes -of this License, Derivative Works shall not include works that remain -separable from, or merely link (or bind by name) to the interfaces of, -the Work and Derivative Works thereof. - -"Contribution" shall mean any work of authorship, including -the original version of the Work and any modifications or additions -to that Work or Derivative Works thereof, that is intentionally -submitted to Licensor for inclusion in the Work by the copyright owner -or by an individual or Legal Entity authorized to submit on behalf of -the copyright owner. For the purposes of this definition, "submitted" -means any form of electronic, verbal, or written communication sent -to the Licensor or its representatives, including but not limited to -communication on electronic mailing lists, source code control systems, -and issue tracking systems that are managed by, or on behalf of, the -Licensor for the purpose of discussing and improving the Work, but -excluding communication that is conspicuously marked or otherwise -designated in writing by the copyright owner as "Not a Contribution." - -"Contributor" shall mean Licensor and any individual or Legal Entity -on behalf of whom a Contribution has been received by Licensor and -subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of -this License, each Contributor hereby grants to You a perpetual, -worldwide, non-exclusive, no-charge, royalty-free, irrevocable -copyright license to reproduce, prepare Derivative Works of, -publicly display, publicly perform, sublicense, and distribute the -Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of -this License, each Contributor hereby grants to You a perpetual, -worldwide, non-exclusive, no-charge, royalty-free, irrevocable -(except as stated in this section) patent license to make, have made, -use, offer to sell, sell, import, and otherwise transfer the Work, -where such license applies only to those patent claims licensable -by such Contributor that are necessarily infringed by their -Contribution(s) alone or by combination of their Contribution(s) -with the Work to which such Contribution(s) was submitted. If You -institute patent litigation against any entity (including a -cross-claim or counterclaim in a lawsuit) alleging that the Work -or a Contribution incorporated within the Work constitutes direct -or contributory patent infringement, then any patent licenses -granted to You under this License for that Work shall terminate -as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the -Work or Derivative Works thereof in any medium, with or without -modifications, and in Source or Object form, provided that You -meet the following conditions: - -(a) You must give any other recipients of the Work or -Derivative Works a copy of this License; and - -(b) You must cause any modified files to carry prominent notices -stating that You changed the files; and - -(c) You must retain, in the Source form of any Derivative Works -that You distribute, all copyright, patent, trademark, and -attribution notices from the Source form of the Work, -excluding those notices that do not pertain to any part of -the Derivative Works; and - -(d) If the Work includes a "NOTICE" text file as part of its -distribution, then any Derivative Works that You distribute must -include a readable copy of the attribution notices contained -within such NOTICE file, excluding those notices that do not -pertain to any part of the Derivative Works, in at least one -of the following places: within a NOTICE text file distributed -as part of the Derivative Works; within the Source form or -documentation, if provided along with the Derivative Works; or, -within a display generated by the Derivative Works, if and -wherever such third-party notices normally appear. The contents -of the NOTICE file are for informational purposes only and -do not modify the License. You may add Your own attribution -notices within Derivative Works that You distribute, alongside -or as an addendum to the NOTICE text from the Work, provided -that such additional attribution notices cannot be construed -as modifying the License. - -You may add Your own copyright statement to Your modifications and -may provide additional or different license terms and conditions -for use, reproduction, or distribution of Your modifications, or -for any such Derivative Works as a whole, provided Your use, -reproduction, and distribution of the Work otherwise complies with -the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, -any Contribution intentionally submitted for inclusion in the Work -by You to the Licensor shall be under the terms and conditions of -this License, without any additional terms or conditions. -Notwithstanding the above, nothing herein shall supersede or modify -the terms of any separate license agreement you may have executed -with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade -names, trademarks, service marks, or product names of the Licensor, -except as required for reasonable and customary use in describing the -origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or -agreed to in writing, Licensor provides the Work (and each -Contributor provides its Contributions) on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -implied, including, without limitation, any warranties or conditions -of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A -PARTICULAR PURPOSE. You are solely responsible for determining the -appropriateness of using or redistributing the Work and assume any -risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, -whether in tort (including negligence), contract, or otherwise, -unless required by applicable law (such as deliberate and grossly -negligent acts) or agreed to in writing, shall any Contributor be -liable to You for damages, including any direct, indirect, special, -incidental, or consequential damages of any character arising as a -result of this License or out of the use or inability to use the -Work (including but not limited to damages for loss of goodwill, -work stoppage, computer failure or malfunction, or any and all -other commercial damages or losses), even if such Contributor -has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing -the Work or Derivative Works thereof, You may choose to offer, -and charge a fee for, acceptance of support, warranty, indemnity, -or other liability obligations and/or rights consistent with this -License. However, in accepting such obligations, You may act only -on Your own behalf and on Your sole responsibility, not on behalf -of any other Contributor, and only if You agree to indemnify, -defend, and hold each Contributor harmless for any liability -incurred by, or claims asserted against, such Contributor by reason -of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - -To apply the Apache License to your work, attach the following -boilerplate notice, with the fields enclosed by brackets "{}" -replaced with your own identifying information. (Don't include -the brackets!) The text should be enclosed in the appropriate -comment syntax for the file format. We also recommend that a -file or class name and description of purpose be included on the -same "printed page" as the copyright notice for easier -identification within third-party archives. - -Copyright {yyyy} {name of copyright owner} - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/docs/articles/overview.html b/docs/articles/a-overview.html similarity index 50% rename from docs/articles/overview.html rename to docs/articles/a-overview.html index 0ecb52a..38b50b0 100644 --- a/docs/articles/overview.html +++ b/docs/articles/a-overview.html @@ -8,14 +8,17 @@ Overview • arcticdatautils - - + + + + -
+
-
+
+

2018-11-07

+ + + + +
-

-Abbreviated API overview:

+Abbreviated API overview
  • -publish_update:
  • +publish_update(): +
    • Mint a DOI for a package
    • -
    • Replace the metadata for a package, from a local file
    • +
    • Replace the metadata for a package
    • Add/remove data in a package
    • +
    +
  • -publish_object: Use before publish_update if you’re adding new data to a package.
  • +publish_object(): Use before publish_update() if you’re adding new data to a package
  • -update_resource_map: Edit the set of child packages for a package
  • +update_resource_map(): Edit the set of child packages for a package
  • -create_resource_map: Useful for creating a new package from scratch. For both project-level metadata packages or dataset-level packages.
  • +create_resource_map(): Useful for creating a new package from scratch. For both project-level metadata packages or dataset-level packages
  • -set_rights_and_access: Use this to give a user edit rights to a package
  • +set_rights_and_access(): Use this to give a user edit rights to a package

The package does way more than this but the above are the most common tasks.

-
@@ -96,11 +134,13 @@

Contents

-

Site built with pkgdown.

+

Site built with pkgdown.

+ + diff --git a/docs/articles/basic-usage.html b/docs/articles/b-basic-usage.html similarity index 62% rename from docs/articles/basic-usage.html rename to docs/articles/b-basic-usage.html index e4f54ec..c7193ed 100644 --- a/docs/articles/basic-usage.html +++ b/docs/articles/b-basic-usage.html @@ -8,14 +8,17 @@ Basic Usage • arcticdatautils - - + + + + -
+
-
+
+

2018-11-07

+ + + + +
-

Usage scenarios

For a lot of editing tasks, we’ll first want to get some variables set up. For the following use cases, we’re going to be doing something to a package, which has a metadata file with the PID ‘X’ in it. Here’s how we set that up:

# Set up your environment first
 options(dataone_test_token = "...") # Set your token here
-env <- env_load("production")
+env <- env_load("production")
 
 # Set up some variables for later
 my_eml_file <- "/path/to/the/file/on/disk/eml.xml"
-pkg <- get_package(env$mn, metadata_pid)
+pkg <- get_package(env$mn, metadata_pid)

At this point, we can do a number of things.

Use: I want to update the metadata in a package with an edited EML file I have on my computer

-
publish_update(env$mn,
-               metadata_pid = pkg$metadata,
-               resource_map_pid = pkg$resource_map,
-               data_pids = pkg$data,
+
publish_update(env$mn,
+               metadata_pid = pkg$metadata,
+               resource_map_pid = pkg$resource_map,
+               data_pids = pkg$data,
                metadata_file_path = my_eml_file)

Use: Mint a DOI for the package

-
publish_update(env$mn,
-               metadata_pid = pkg$metadata,
-               resource_map_pid = pkg$resource_map,
-               data_pids = pkg$data,
+
publish_update(env$mn,
+               metadata_pid = pkg$metadata,
+               resource_map_pid = pkg$resource_map,
+               data_pids = pkg$data,
                use_doi = TRUE)
@@ -93,18 +128,18 @@

new_data_object_pid <- publish_object(filepath = new_object_path, format_id = "text/csv") -publish_update(env$mn, - metadata_pid = pkg$metadata, - resource_map_pid = pkg$resource_map, - data_pids = c(pkg$data, newnew_data_object_pid))

-
+publish_update(env$mn, + metadata_pid = pkg$metadata, + resource_map_pid = pkg$resource_map, + data_pids = c(pkg$data, newnew_data_object_pid))
+ + diff --git a/docs/articles/index.html b/docs/articles/index.html deleted file mode 100644 index 7e02f50..0000000 --- a/docs/articles/index.html +++ /dev/null @@ -1,102 +0,0 @@ - - - - - - - - -Articles • arcticdatautils - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- - - -
- - - -
-
-
-

All vignettes

-

- - -
-
-
- -
- - -
-

Site built with pkgdown.

-
- -
-
- - - diff --git a/docs/authors.html b/docs/authors.html index b2479ba..0e91c9f 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -18,14 +18,24 @@ + + + + + - + + + + + + - +
@@ -68,19 +106,43 @@ -
-
+
+
  • -

    Bryce Mecum. Author, maintainer. +

    Bryce Mecum. Author, maintainer.

  • -

    Matt Jones. Contributor. +

    Matt Jones. Contributor. +

    +
  • +
  • +

    Jesse Goldstein. Contributor. +
    Maintainer

    +
  • +
  • +

    Jeanette Clark. Contributor. +
    Maintainer

    +
  • +
  • +

    Dominic Mullen. Contributor. +

    +
  • +
  • +

    Emily O'Dean. Contributor. +

    +
  • +
  • +

    Robyn Thiessen-Bock. Contributor. +

    +
  • +
  • +

    Derek Strong. Contributor.

@@ -96,11 +158,14 @@

Authors

-

Site built with pkgdown.

+

Site built with pkgdown.

+ + + diff --git a/docs/docsearch.css b/docs/docsearch.css new file mode 100644 index 0000000..e5f1fe1 --- /dev/null +++ b/docs/docsearch.css @@ -0,0 +1,148 @@ +/* Docsearch -------------------------------------------------------------- */ +/* + Source: https://github.com/algolia/docsearch/ + License: MIT +*/ + +.algolia-autocomplete { + display: block; + -webkit-box-flex: 1; + -ms-flex: 1; + flex: 1 +} + +.algolia-autocomplete .ds-dropdown-menu { + width: 100%; + min-width: none; + max-width: none; + padding: .75rem 0; + background-color: #fff; + background-clip: padding-box; + border: 1px solid rgba(0, 0, 0, .1); + box-shadow: 0 .5rem 1rem rgba(0, 0, 0, .175); +} + +@media (min-width:768px) { + .algolia-autocomplete .ds-dropdown-menu { + width: 175% + } +} + +.algolia-autocomplete .ds-dropdown-menu::before { + display: none +} + +.algolia-autocomplete .ds-dropdown-menu [class^=ds-dataset-] { + padding: 0; + background-color: rgb(255,255,255); + border: 0; + max-height: 80vh; +} + +.algolia-autocomplete .ds-dropdown-menu .ds-suggestions { + margin-top: 0 +} + +.algolia-autocomplete .algolia-docsearch-suggestion { + padding: 0; + overflow: visible +} + +.algolia-autocomplete .algolia-docsearch-suggestion--category-header { + padding: .125rem 1rem; + margin-top: 0; + font-size: 1.3em; + font-weight: 500; + color: #00008B; + border-bottom: 0 +} + +.algolia-autocomplete .algolia-docsearch-suggestion--wrapper { + float: none; + padding-top: 0 +} + +.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column { + float: none; + width: auto; + padding: 0; + text-align: left +} + +.algolia-autocomplete .algolia-docsearch-suggestion--content { + float: none; + width: auto; + padding: 0 +} + +.algolia-autocomplete .algolia-docsearch-suggestion--content::before { + display: none +} + +.algolia-autocomplete .ds-suggestion:not(:first-child) .algolia-docsearch-suggestion--category-header { + padding-top: .75rem; + margin-top: .75rem; + border-top: 1px solid rgba(0, 0, 0, .1) +} + +.algolia-autocomplete .ds-suggestion .algolia-docsearch-suggestion--subcategory-column { + display: block; + padding: .1rem 1rem; + margin-bottom: 0.1; + font-size: 1.0em; + font-weight: 400 + /* display: none */ +} + +.algolia-autocomplete .algolia-docsearch-suggestion--title { + display: block; + padding: .25rem 1rem; + margin-bottom: 0; + font-size: 0.9em; + font-weight: 400 +} + +.algolia-autocomplete .algolia-docsearch-suggestion--text { + padding: 0 1rem .5rem; + margin-top: -.25rem; + font-size: 0.8em; + font-weight: 400; + line-height: 1.25 +} + +.algolia-autocomplete .algolia-docsearch-footer { + width: 110px; + height: 20px; + z-index: 3; + margin-top: 10.66667px; + float: right; + font-size: 0; + line-height: 0; +} + +.algolia-autocomplete .algolia-docsearch-footer--logo { + background-image: url("data:image/svg+xml;utf8,"); + background-repeat: no-repeat; + background-position: 50%; + background-size: 100%; + overflow: hidden; + text-indent: -9000px; + width: 100%; + height: 100%; + display: block; + transform: translate(-8px); +} + +.algolia-autocomplete .algolia-docsearch-suggestion--highlight { + color: #FF8C00; + background: rgba(232, 189, 54, 0.1) +} + + +.algolia-autocomplete .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight { + box-shadow: inset 0 -2px 0 0 rgba(105, 105, 105, .5) +} + +.algolia-autocomplete .ds-suggestion.ds-cursor .algolia-docsearch-suggestion--content { + background-color: rgba(192, 192, 192, .15) +} diff --git a/docs/docsearch.js b/docs/docsearch.js new file mode 100644 index 0000000..b35504c --- /dev/null +++ b/docs/docsearch.js @@ -0,0 +1,85 @@ +$(function() { + + // register a handler to move the focus to the search bar + // upon pressing shift + "/" (i.e. "?") + $(document).on('keydown', function(e) { + if (e.shiftKey && e.keyCode == 191) { + e.preventDefault(); + $("#search-input").focus(); + } + }); + + $(document).ready(function() { + // do keyword highlighting + /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ + var mark = function() { + + var referrer = document.URL ; + var paramKey = "q" ; + + if (referrer.indexOf("?") !== -1) { + var qs = referrer.substr(referrer.indexOf('?') + 1); + var qs_noanchor = qs.split('#')[0]; + var qsa = qs_noanchor.split('&'); + var keyword = ""; + + for (var i = 0; i < qsa.length; i++) { + var currentParam = qsa[i].split('='); + + if (currentParam.length !== 2) { + continue; + } + + if (currentParam[0] == paramKey) { + keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); + } + } + + if (keyword !== "") { + $(".contents").unmark({ + done: function() { + $(".contents").mark(keyword); + } + }); + } + } + }; + + mark(); + }); +}); + +/* Search term highlighting ------------------------------*/ + +function matchedWords(hit) { + var words = []; + + var hierarchy = hit._highlightResult.hierarchy; + // loop to fetch from lvl0, lvl1, etc. + for (var idx in hierarchy) { + words = words.concat(hierarchy[idx].matchedWords); + } + + var content = hit._highlightResult.content; + if (content) { + words = words.concat(content.matchedWords); + } + + // return unique words + var words_uniq = [...new Set(words)]; + return words_uniq; +} + +function updateHitURL(hit) { + + var words = matchedWords(hit); + var url = ""; + + if (hit.anchor) { + url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; + } else { + url = hit.url + '?q=' + escape(words.join(" ")); + } + + return url; +} diff --git a/docs/index.html b/docs/index.html index e08a7fe..478ed03 100644 --- a/docs/index.html +++ b/docs/index.html @@ -5,11 +5,15 @@ -Arctic Data Utilities • arcticdatautils +Utilities for the Arctic Data Center • arcticdatautils - - + + + + @@ -24,18 +28,47 @@ - arcticdatautils + + arcticdatautils + 0.6.4 +
+
@@ -46,50 +79,55 @@
-
+
-

The articadatautils R package contains code for:

+arcticdatautils
+ +

The arcticdatautils package contains code for doing lots of useful stuff that’s too specific for the dataone package:

    -
  • Inserting large numbers of files into Metacat
  • -
  • High-level rdataone wrappers for editing objects and Data Packages
  • +
  • Inserting large numbers of files into a Metacat Member Node
  • +
  • High-level dataone wrappers for working with Objects and Data Packages that streamline Arctic Data Center operations

Note: The package is intended to be used by NCEAS staff and may not make much sense to others.

Installing

-

I recommend installing from the latest release (aka tag) instead of from master. Install from release with:

-
devtools::install_github("NCEAS/arcticdatautils", ref = "{TAG_NAME_HERE}")
+

We recommend installing from the latest release (aka tag) instead of from master. Install the latest release with the remotes package:

+
remotes::install_github("nceas/arcticdatautils@*release")

If you’re feeling adventurous, you can install from the bleeding edge:

-
devtools::install_github("NCEAS/arcticdatautils")
-
-
-

-Contributing

-

Please submit suggestions or bugs as Issues.

-
-
-

-Testing

-

Some tests are dependent on an authentication token being set and be skipped if one is not set.

-
# Skips tests that depend on a Metacat instance:
-devtools::test()
-
-# Set a token to run skipped tests:
-options(dataone_test_token = "...")
-devtools::test()
+
remotes::install_github("nceas/arcticdatautils")
@@ -99,11 +137,13 @@

Developers

-

Site built with pkgdown.

+

Site built with pkgdown.

+ + diff --git a/docs/pkgdown.css b/docs/pkgdown.css index e704e06..6ca2f37 100644 --- a/docs/pkgdown.css +++ b/docs/pkgdown.css @@ -1,13 +1,32 @@ -/* Sticker footer */ +/* Sticky footer */ + +/** + * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/ + * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css + * + * .Site -> body > .container + * .Site-content -> body > .container .row + * .footer -> footer + * + * Key idea seems to be to ensure that .container and __all its parents__ + * have height set to 100% + * + */ + +html, body { + height: 100%; +} + body > .container { display: flex; - padding-top: 60px; - min-height: calc(100vh); + height: 100%; flex-direction: column; + + padding-top: 60px; } body > .container .row { - flex: 1; + flex: 1 0 auto; } footer { @@ -16,6 +35,7 @@ footer { border-top: 1px solid #e5e5e5; color: #666; display: flex; + flex-shrink: 0; } footer p { margin-bottom: 0; @@ -34,13 +54,20 @@ img.icon { float: right; } -/* Section anchors ---------------------------------*/ +img { + max-width: 100%; +} -.hasAnchor { - margin-left: -30px; +/* Typographic tweaking ---------------------------------*/ + +.contents h1.page-header { + margin-top: calc(-60px + 1em); } +/* Section anchors ---------------------------------*/ + a.anchor { + margin-left: -30px; display:inline-block; width: 30px; height: 30px; @@ -56,13 +83,31 @@ a.anchor { visibility: visible; } +@media (max-width: 767px) { + .hasAnchor:hover a.anchor { + visibility: hidden; + } +} + + /* Fixes for fixed navbar --------------------------*/ .contents h1, .contents h2, .contents h3, .contents h4 { padding-top: 60px; - margin-top: -60px; + margin-top: -40px; +} + +/* Static header placement on mobile devices */ +@media (max-width: 767px) { + .navbar-fixed-top { + position: absolute; + } + .navbar { + padding: 0; + } } + /* Sidebar --------------------------*/ #sidebar { @@ -81,10 +126,14 @@ a.anchor { margin-bottom: 0.5em; } +.orcid { + height: 16px; + vertical-align: middle; +} + /* Reference index & topics ----------------------------------------------- */ .ref-index th {font-weight: normal;} -.ref-index h2 {font-size: 20px;} .ref-index td {vertical-align: top;} .ref-index .alias {width: 40%;} @@ -107,31 +156,77 @@ table { /* Syntax highlighting ---------------------------------------------------- */ -code { - background-color: #f7f7f7; - color: #333; +pre { + word-wrap: normal; + word-break: normal; + border: 1px solid #eee; } -code a { - color: #375f84; + +pre, code { + background-color: #f8f8f8; + color: #333; } -.warning { color: red; } -.message { font-weight: bolder; } -.error { color: red; font-weight: bolder; } +pre code { + overflow: auto; + word-wrap: normal; + white-space: pre; +} -.fl,.number {color:rgb(21,20,181);} -.fu,.functioncall {color:#264D66 ;} -.ch,.st,.string {color:#375D81 ;} -.kw,.keyword {color:black;} -.argument {color:#264D66 ;} -.co,.comment {color: #777;} -.formalargs {color: #264D66;} -.eqformalargs {color:#264D66;} -.slot {font-style:italic;} -.symbol {color:black ;} -.prompt {color:black ;} +pre .img { + margin: 5px 0; +} -pre img { +pre .img img { background-color: #fff; display: block; + height: auto; +} + +code a, pre a { + color: #375f84; +} + +a.sourceLine:hover { + text-decoration: none; +} + +.fl {color: #1514b5;} +.fu {color: #000000;} /* function */ +.ch,.st {color: #036a07;} /* string */ +.kw {color: #264D66;} /* keyword */ +.co {color: #888888;} /* comment */ + +.message { color: black; font-weight: bolder;} +.error { color: orange; font-weight: bolder;} +.warning { color: #6A0366; font-weight: bolder;} + +/* Clipboard --------------------------*/ + +.hasCopyButton { + position: relative; +} + +.btn-copy-ex { + position: absolute; + right: 0; + top: 0; + visibility: hidden; +} + +.hasCopyButton:hover button.btn-copy-ex { + visibility: visible; +} + +/* mark.js ----------------------------*/ + +mark { + background-color: rgba(255, 255, 51, 0.5); + border-bottom: 2px solid rgba(255, 153, 51, 0.3); + padding: 1px; +} + +/* vertical spacing after htmlwidgets */ +.html-widget { + margin-bottom: 10px; } diff --git a/docs/pkgdown.js b/docs/pkgdown.js index c8b38c4..de9bd72 100644 --- a/docs/pkgdown.js +++ b/docs/pkgdown.js @@ -1,8 +1,110 @@ -$(function() { - $("#sidebar").stick_in_parent({offset_top: 40}); - $('body').scrollspy({ - target: '#sidebar', - offset: 60 +/* http://gregfranko.com/blog/jquery-best-practices/ */ +(function($) { + $(function() { + + $("#sidebar") + .stick_in_parent({offset_top: 40}) + .on('sticky_kit:bottom', function(e) { + $(this).parent().css('position', 'static'); + }) + .on('sticky_kit:unbottom', function(e) { + $(this).parent().css('position', 'relative'); + }); + + $('body').scrollspy({ + target: '#sidebar', + offset: 60 + }); + + $('[data-toggle="tooltip"]').tooltip(); + + var cur_path = paths(location.pathname); + var links = $("#navbar ul li a"); + var max_length = -1; + var pos = -1; + for (var i = 0; i < links.length; i++) { + if (links[i].getAttribute("href") === "#") + continue; + var path = paths(links[i].pathname); + + var length = prefix_length(cur_path, path); + if (length > max_length) { + max_length = length; + pos = i; + } + } + + // Add class to parent
  • , and enclosing
  • if in dropdown + if (pos >= 0) { + var menu_anchor = $(links[pos]); + menu_anchor.parent().addClass("active"); + menu_anchor.closest("li.dropdown").addClass("active"); + } }); -}); + function paths(pathname) { + var pieces = pathname.split("/"); + pieces.shift(); // always starts with / + + var end = pieces[pieces.length - 1]; + if (end === "index.html" || end === "") + pieces.pop(); + return(pieces); + } + + function prefix_length(needle, haystack) { + if (needle.length > haystack.length) + return(0); + + // Special case for length-0 haystack, since for loop won't run + if (haystack.length === 0) { + return(needle.length === 0 ? 1 : 0); + } + + for (var i = 0; i < haystack.length; i++) { + if (needle[i] != haystack[i]) + return(i); + } + + return(haystack.length); + } + + /* Clipboard --------------------------*/ + + function changeTooltipMessage(element, msg) { + var tooltipOriginalTitle=element.getAttribute('data-original-title'); + element.setAttribute('data-original-title', msg); + $(element).tooltip('show'); + element.setAttribute('data-original-title', tooltipOriginalTitle); + } + + if(Clipboard.isSupported()) { + $(document).ready(function() { + var copyButton = ""; + + $(".examples, div.sourceCode").addClass("hasCopyButton"); + + // Insert copy buttons: + $(copyButton).prependTo(".hasCopyButton"); + + // Initialize tooltips: + $('.btn-copy-ex').tooltip({container: 'body'}); + + // Initialize clipboard: + var clipboardBtnCopies = new Clipboard('[data-clipboard-copy]', { + text: function(trigger) { + return trigger.parentNode.textContent; + } + }); + + clipboardBtnCopies.on('success', function(e) { + changeTooltipMessage(e.trigger, 'Copied!'); + e.clearSelection(); + }); + + clipboardBtnCopies.on('error', function() { + changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); + }); + }); + } +})(window.jQuery || window.$) diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml new file mode 100644 index 0000000..a346e19 --- /dev/null +++ b/docs/pkgdown.yml @@ -0,0 +1,7 @@ +pandoc: 1.19.2.1 +pkgdown: 1.1.0 +pkgdown_sha: ~ +articles: + a-overview: a-overview.html + b-basic-usage: b-basic-usage.html + diff --git a/docs/reference/add_methods_step.html b/docs/reference/add_methods_step.html index 1c33a86..656e147 100644 --- a/docs/reference/add_methods_step.html +++ b/docs/reference/add_methods_step.html @@ -6,7 +6,7 @@ -Adds a step to the methods document — add_methods_step • arcticdatautils +Add a methods step — add_methods_step • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,19 +109,23 @@ -
    +
    +
    -

    Adds a step to the methods document

    +

    Add a methods step to an EML document.

    +
    -
    add_methods_step(doc, title, description)
    +
    add_methods_step(doc, title, description)
    -

    Arguments

    +

    Arguments

    @@ -99,9 +144,15 @@

    Ar

    Value

    -

    (eml) The modified EML document

    +

    (eml) The modified EML document.

    +

    Examples

    +
    # NOT RUN {
    +eml <- read_eml("~/Documents/metadata.xml")
    +eml <- add_methods_step(eml, "Field Sampling", "Samples were
    +collected using a niskin water sampler.")
    +# }
    @@ -120,11 +173,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/arcticdatautils.html b/docs/reference/arcticdatautils.html new file mode 100644 index 0000000..501ff50 --- /dev/null +++ b/docs/reference/arcticdatautils.html @@ -0,0 +1,155 @@ + + + + + + + + +arcticdatautils: Utilities for the Arctic Data Center — arcticdatautils • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    This package contains code for doing lots of useful stuff that's too specific for the +dataone package, primarily functions that streamline Arctic Data Center operations.

    + +
    + + + +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/clear_methods.html b/docs/reference/clear_methods.html index d3af406..d6e8a08 100644 --- a/docs/reference/clear_methods.html +++ b/docs/reference/clear_methods.html @@ -6,7 +6,7 @@ -Clear all methods from the document. — clear_methods • arcticdatautils +Clear all methods — clear_methods • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,19 +109,23 @@ -
    +
    +
    -

    Clear all methods from the document.

    +

    Clear all methods from an EML document.

    +
    -
    clear_methods(doc)
    +
    clear_methods(doc)
    -

    Arguments

    +

    Arguments

    @@ -91,9 +136,14 @@

    Ar

    Value

    -

    (eml) The modified document.

    +

    (eml) The modified EML document.

    +

    Examples

    +
    # NOT RUN {
    +eml <- read_eml("~/Documents/metadata.xml")
    +eml <- clear_methods(eml)
    +# }
    @@ -112,11 +164,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/convert_iso_to_eml.html b/docs/reference/convert_iso_to_eml.html index 64e1b01..55ab348 100644 --- a/docs/reference/convert_iso_to_eml.html +++ b/docs/reference/convert_iso_to_eml.html @@ -6,7 +6,7 @@ -Convert and ISO document to EML using an XSLT. — convert_iso_to_eml • arcticdatautils +Convert an ISO document to EML using an XSLT — convert_iso_to_eml • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,19 +109,23 @@ -
    +
    +

    Leave style=NA if you want to use the default ISO-to-EML stylesheet.

    +
    -
    convert_iso_to_eml(path, style = NA)
    +
    convert_iso_to_eml(path, style = NA)
    -

    Arguments

    +

    Arguments

    @@ -98,6 +143,11 @@

    Value

    (character) Location of the converted file.

    +

    Examples

    +
    # NOT RUN {
    +iso_path <- "~/Docuements/ISO_metadata.xml"
    +eml_path <- convert_iso_to_eml(iso_path)
    +# }
    @@ -116,11 +168,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/create_dummy_attributes_dataframe.html b/docs/reference/create_dummy_attributes_dataframe.html new file mode 100644 index 0000000..cfe18eb --- /dev/null +++ b/docs/reference/create_dummy_attributes_dataframe.html @@ -0,0 +1,181 @@ + + + + + + + + +Create test attributes data.frame — create_dummy_attributes_dataframe • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Create a test data.frame of attributes.

    + +
    + +
    create_dummy_attributes_dataframe(numberAttributes, factors = NULL)
    + +

    Arguments

    +
    + + + + + + + + + +
    numberAttributes

    (integer) Number of attributes to be created in the table.

    factors

    (character) Optional vector of factor names to include.

    + +

    Value

    + +

    (data.frame) A data.frame of attributes.

    + + +

    Examples

    +
    # NOT RUN {
    +# Create dummy attribute dataframe with 6 attributes and 1 factor
    +attributes <- create_dummy_attributes_dataframe(6, c("Factor1", "Factor2"))
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/create_dummy_enumeratedDomain_dataframe.html b/docs/reference/create_dummy_enumeratedDomain_dataframe.html new file mode 100644 index 0000000..1a1ffc7 --- /dev/null +++ b/docs/reference/create_dummy_enumeratedDomain_dataframe.html @@ -0,0 +1,177 @@ + + + + + + + + +Create test enumeratedDomain data.frame — create_dummy_enumeratedDomain_dataframe • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Create a test data.frame of enumeratedDomains.

    + +
    + +
    create_dummy_enumeratedDomain_dataframe(factors)
    + +

    Arguments

    + + + + + + +
    factors

    (character) Vector of factor names to include.

    + +

    Value

    + +

    (data.frame) A data.frame of factors.

    + + +

    Examples

    +
    # NOT RUN {
    +# Create dummy dataframe of 2 factors/enumerated domains
    +attributes <- create_dummy_enumeratedDomain_dataframe(c("Factor1", "Factor2"))
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/create_dummy_metadata.html b/docs/reference/create_dummy_metadata.html index 06d315f..7c2efab 100644 --- a/docs/reference/create_dummy_metadata.html +++ b/docs/reference/create_dummy_metadata.html @@ -6,7 +6,7 @@ -helpers.R — create_dummy_metadata • arcticdatautils +Create a test metadata object — create_dummy_metadata • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,20 +109,23 @@ -
    +
    +
    -

    Various helper functions for things like testing the package. -Create a test metadata object.

    +

    Create a test EML metadata object.

    +
    -
    create_dummy_metadata(mn, data_pids = NULL)
    +
    create_dummy_metadata(mn, data_pids = NULL)
    -

    Arguments

    +

    Arguments

    @@ -94,13 +138,28 @@

    Ar

    +

    Value

    +

    (character) PID of published metadata document.

    + + +

    Examples

    +
    # NOT RUN {
    +# Set environment
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pid <- create_dummy_metadata(mn)
    +# }
  • Value
  • + +
  • Examples
  • + @@ -111,11 +170,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/create_dummy_object.html b/docs/reference/create_dummy_object.html index 9b7af7d..0b7490f 100644 --- a/docs/reference/create_dummy_object.html +++ b/docs/reference/create_dummy_object.html @@ -6,7 +6,7 @@ -Create a test object. — create_dummy_object • arcticdatautils +Create a test object — create_dummy_object • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,19 +109,23 @@ -
    +
    +
    -

    Create a test object.

    +

    Create a test data object.

    +
    -
    create_dummy_object(mn)
    +
    create_dummy_object(mn)
    -

    Arguments

    +

    Arguments

    @@ -89,13 +134,29 @@

    Ar

    +

    Value

    + +

    (character) The PID of the dummy object.

    + +

    Examples

    +
    # NOT RUN {
    +# Set environment
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +
    +pid <- create_dummy_object(mn)
    +# }
    @@ -106,11 +167,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/create_dummy_package.html b/docs/reference/create_dummy_package.html index 8dc3f51..c6025cb 100644 --- a/docs/reference/create_dummy_package.html +++ b/docs/reference/create_dummy_package.html @@ -6,7 +6,7 @@ -Create a test package. — create_dummy_package • arcticdatautils +Create a test package — create_dummy_package • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,19 +109,23 @@ -
    +
    +
    -

    Create a test package.

    +

    Create a test data package.

    +
    -
    create_dummy_package(mn, size = 2)
    +
    create_dummy_package(mn, size = 2)
    -

    Arguments

    +

    Arguments

    @@ -89,17 +134,33 @@

    Ar

    - +
    size

    (numeric) The number of files in the package.

    (numeric) The number of files in the package, including the metadata file.

    +

    Value

    +

    (character) A named character vector of the data PIDs in the package.

    + + +

    Examples

    +
    # NOT RUN {
    +# Set environment
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +#Create dummy package with 5 data objects and 1 metadata object
    +pids <- create_dummy_package(mn, 6)
    +# }
    @@ -110,11 +171,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/create_dummy_package_full.html b/docs/reference/create_dummy_package_full.html new file mode 100644 index 0000000..de2b87e --- /dev/null +++ b/docs/reference/create_dummy_package_full.html @@ -0,0 +1,180 @@ + + + + + + + + +Create dummy package with fuller metadata — create_dummy_package_full • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Creates a fuller package than create_dummy_package() +but is otherwise based on the same concept. This dummy +package includes multiple data objects, responsible parties, +geographic locations, method steps, etc.

    + +
    + +
    create_dummy_package_full(mn, title = "A Dummy Package")
    + +

    Arguments

    + + + + + + + + + + +
    mn

    (MNode) The Member Node.

    title

    (character) Optional. Title of package. Defaults to "A Dummy Package".

    + +

    Value

    + +

    (list) A list of package PIDs, inluding for the resource map, metadata, and data objects.

    + + +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/create_dummy_parent_package.html b/docs/reference/create_dummy_parent_package.html index 5e97b41..cd47a79 100644 --- a/docs/reference/create_dummy_parent_package.html +++ b/docs/reference/create_dummy_parent_package.html @@ -6,7 +6,7 @@ -Create a test parent package. — create_dummy_parent_package • arcticdatautils +Create a test parent package — create_dummy_parent_package • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,19 +109,23 @@ -
    +
    +
    -

    Create a test parent package.

    +

    Create a test parent data package.

    +
    -
    create_dummy_parent_package(mn, children)
    +
    create_dummy_parent_package(mn, children)
    -

    Arguments

    +

    Arguments

    @@ -93,13 +138,26 @@

    Ar

    +

    Value

    +

    pid (character) A named character vector of PIDs, including parent package and child package PIDs.

    + + +

    Examples

    +
    # NOT RUN {
    +# Set environment
    +# }
    +
    @@ -110,11 +168,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/create_resource_map.html b/docs/reference/create_resource_map.html index 683785d..3cd7788 100644 --- a/docs/reference/create_resource_map.html +++ b/docs/reference/create_resource_map.html @@ -6,7 +6,7 @@ -Create a resource map Object on a Member Node. — create_resource_map • arcticdatautils +Create a resource map object on a Member Node — create_resource_map • arcticdatautils @@ -18,14 +18,29 @@ + + + + + - + + + + + + + + + - + @@ -68,22 +111,26 @@ -
    +
    +

    This function first generates a new resource map RDF/XML document locally and -then uses the dataone::createObject function to create the Object on the +then uses the dataone::createObject() function to create the object on the specified MN.

    +
    -
    create_resource_map(mn, metadata_pid, data_pids = NULL, child_pids = NULL,
    -  check_first = TRUE)
    +
    create_resource_map(mn, metadata_pid, data_pids = NULL,
    +  child_pids = NULL, check_first = TRUE, ...)
    -

    Arguments

    +

    Arguments

    @@ -92,13 +139,11 @@

    Ar

    - + - + @@ -108,21 +153,35 @@

    Ar

    +arguments exist on the MN before continuing. This speeds up the function, +especially when data_pids has many elements.

    + + + +
    metadata_pid

    (character) The PID of the metadata object to go in the -package.

    (character) The PID of the metadata object to go in the package.

    data_pids

    (character) The PID(s) of the data objects to go in the -package.

    (character) The PID(s) of the data objects to go in the package.

    child_pids
    check_first

    (logical) Optional. Whether to check the PIDs passed in as -aruments exist on the MN before continuing. This speeds up the function, -especially when `data_pids` has many elements.

    ...

    Additional arguments that can be passed into publish_object().

    Value

    -

    (character) The created resource map's PID

    +

    (character) The PID of the created resource map.

    Details

    -

    If you only want to generate resource map RDF/XML, see -generate_resource_map

    +

    If you only want to generate resource map RDF/XML, see generate_resource_map().

    +

    Examples

    +
    # NOT RUN {
    +cn <- CNode('STAGING2')
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +
    +meta_pid <- 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe'
    +dat_pid <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1',
    +'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe')
    +
    +create_resource_map(mn, metadata_pid = meta_pid, data_pids = dat_pid)
    +# }
    @@ -143,11 +204,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/eml_abstract.html b/docs/reference/eml_abstract.html new file mode 100644 index 0000000..16caca3 --- /dev/null +++ b/docs/reference/eml_abstract.html @@ -0,0 +1,178 @@ + + + + + + + + +Create an EML abstract — eml_abstract • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Create an EML abstract.

    + +
    + +
    eml_abstract(text)
    + +

    Arguments

    + + + + + + +
    text

    (character) Paragraphs of text with one paragraph per element in the +character vector.

    + +

    Value

    + +

    (abstract) An EML abstract.

    + + +

    Examples

    +
    # Set an abstract with a single paragraph +eml_abstract("Test abstract...")
    #> <abstract>hi</abstract>
    +# Or one with multiple paragraphs +eml_abstract(c("First para...", "second para..."))
    #> <abstract/>
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/eml_add_entities.html b/docs/reference/eml_add_entities.html index 227f2d2..158f3ea 100644 --- a/docs/reference/eml_add_entities.html +++ b/docs/reference/eml_add_entities.html @@ -6,7 +6,7 @@ -Add new entity (otherEntity, dataTable, etc) elements to an EML document from a table. — eml_add_entities • arcticdatautils +Add new entity elements to an EML document from a table — eml_add_entities • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,36 +109,40 @@ -
    +
    +
    -

    Add new entity (otherEntity, dataTable, etc) elements to an EML document from a table.

    +

    Add new entity elements to an EML document from a table.

    +
    -
    eml_add_entities(doc, entities,
    +    
    eml_add_entities(doc, entities,
       resolve_base = "https://cn.dataone.org/cn/v2/resolve/")
    -

    Arguments

    +

    Arguments

    - + - + +should not override the default value.

    doc

    (eml) An EML document

    (eml) An EML document.

    entities

    (data.frame) A data.frame with columns path, pid, and -format_id

    (data.frame) A data.frame with columns type, path, pid, and +format_id.

    resolve_base

    (character) Optional. Specify a DataONE CN resolve base URI which will be used for serializing download URLs into the EML. Most users - should not override the default value.

    @@ -107,22 +152,32 @@

    Value

    Examples

    -
    # Create entities from files on disk -## Not run: ------------------------------------ -# types <- c("dataTable") -# paths <- list.files(., full.names = TRUE) # Get full paths to some files -# pids <- vapply(paths, function(x) { paste0("urn:uuid:", uuid::UUIDgenerate()) }, "") # Generate some UUID PIDs -# format_ids <- guess_format_id(paths) # Try to guess format IDs, you should check this afterwards -# -# entity_df <- data.frame(type = types, -# path = paths, -# pid = pids, -# format_id = format_ids, -# stringsAsFactors = FALSE) -# -# doc <- new("eml") -# doc <- eml_add_entities(doc, entity_df) -## ---------------------------------------------
    +
    # Create entities from files on disk +
    # NOT RUN { + types <- c("dataTable") + paths <- list.files(., full.names = TRUE) # Get full paths to some files + pids <- vapply(paths, function(x) { + paste0("urn:uuid:", uuid::UUIDgenerate()) + }, "") # Generate some UUID PIDs +Try to guess format IDs, you should check this afterwards + format_ids <- guess_format_id(paths) + + entity_df <- data.frame(type = types, + path = paths, + pid = pids, + format_id = format_ids, + stringsAsFactors = FALSE) + + doc <- new("eml") + doc <- eml_add_entities(doc, entity_df) +# } +
    +# Read in a CSV containing the info about files on disk +
    # NOT RUN { + entity_df <- read.csv("./my_entities.csv", stringsAsFactors = FALSE) + doc <- new("eml") + doc <- eml_add_entities(doc, entity_df) +# }
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/eml_address.html b/docs/reference/eml_address.html index 93ee051..5daf6e7 100644 --- a/docs/reference/eml_address.html +++ b/docs/reference/eml_address.html @@ -6,7 +6,7 @@ -Create an EML address element. — eml_address • arcticdatautils +Create an EML address element — eml_address • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - +
    @@ -68,19 +109,23 @@ -
    +
    +
    -

    Create an EML address element.

    +

    A simple way to create an EML address element.

    +
    -
    eml_address(delivery_points, city, administrative_area, postal_code)
    +
    eml_address(delivery_points, city, administrative_area, postal_code)
    -

    Arguments

    +

    Arguments

    @@ -89,15 +134,15 @@

    Ar

    - + - + - +
    city

    (character) City

    (character) City.

    administrative_area

    (character) Administrative area

    (character) Administrative area.

    postal_code

    (character) Postal code

    (character) Postal code.

    @@ -106,6 +151,8 @@

    Value

    (address) An EML address object.

    +

    Examples

    +
    NCEASadd <- eml_address("735 State St #300", "Santa Barbara", "CA", "93101")
    @@ -124,11 +173,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/eml_associated_party.html b/docs/reference/eml_associated_party.html index 3a73689..f957bb8 100644 --- a/docs/reference/eml_associated_party.html +++ b/docs/reference/eml_associated_party.html @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,64 +109,43 @@ -
    +
    +
    -

    Create an EML associatedParty

    +

    See eml_party() for details.

    +
    -
    eml_associated_party(given_names, sur_name, organization = NULL,
    -  email = NULL, phone = NULL, address = NULL, role)
    +
    eml_associated_party(...)
    -

    Arguments

    +

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - + +
    given_names

    (character) One or more given (first) names.

    sur_name

    (character) A sur (last) name.

    organization

    (character) One or more organization names.

    email

    (character) An email address.

    phone

    (character) A phone number.

    address

    (address) An object of type 'address' (EML).

    role

    (character) A role

    ...

    Arguments passed on to eml_party().

    Value

    -

    (associatedParty) The new associatedParty

    +

    (associatedParty) The new associatedParty.

    Examples

    -
    eml_associated_party("test", "user", "test@user.com", role = "Principal Investigator")
    #> <associatedParty system="uuid"> +
    eml_associated_party("test", "user", email = "test@user.com", role = "Principal Investigator")
    #> <associatedParty system="uuid"> #> <individualName> #> <givenName>test</givenName> #> <surName>user</surName> #> </individualName> -#> <organizationName>test@user.com</organizationName> +#> <electronicMailAddress>test@user.com</electronicMailAddress> #> <role>Principal Investigator</role> #> </associatedParty>
    @@ -148,11 +168,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/eml_contact.html b/docs/reference/eml_contact.html index 6057737..060384c 100644 --- a/docs/reference/eml_contact.html +++ b/docs/reference/eml_contact.html @@ -6,7 +6,7 @@ -Create an EML contact. — eml_contact • arcticdatautils +Create an EML contact — eml_contact • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - +
    @@ -68,61 +109,43 @@ -
    +
    +
    -

    Create an EML contact.

    +

    See eml_party() for details.

    +
    -
    eml_contact(given_names, sur_name, organization = NULL, email = NULL,
    -  phone = NULL, address = NULL)
    +
    eml_contact(...)
    -

    Arguments

    +

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - + +
    given_names

    (character) One or more given (first) names.

    sur_name

    (character) A sur (last) name.

    organization

    (character) One or more organization names.

    email

    (character) An email address.

    phone

    (character) A phone number.

    address

    (address) An object of type 'address' (EML).

    ...

    Arguments passed on to eml_party().

    Value

    -

    (contact) The new contact

    +

    (contact) The new contact.

    Examples

    -
    eml_contact("test", "user", "test@user.com")
    #> <contact system="uuid"> -#> <individualName> -#> <givenName>test</givenName> -#> <surName>user</surName> -#> </individualName> -#> <organizationName>test@user.com</organizationName> -#> </contact>
    +
    # NOT RUN {
    +eml_contact("test", "user", email = "test@user.com")
    +eml_creator("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766")
    +eml_creator("creator", c("Dominic", "'Dom'"), "Mullen", c("NCEAS", "UCSB"),
    +            c("Data Scientist", "Programmer"))
    +# }
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/eml_creator.html b/docs/reference/eml_creator.html index a1003fd..fa35872 100644 --- a/docs/reference/eml_creator.html +++ b/docs/reference/eml_creator.html @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - +
    @@ -68,61 +109,43 @@ -
    +
    +
    -

    Create an EML creator

    +

    See eml_party() for details.

    +
    -
    eml_creator(given_names, sur_name, organization = NULL, email = NULL,
    -  phone = NULL, address = NULL)
    +
    eml_creator(...)
    -

    Arguments

    +

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - + +
    given_names

    (character) One or more given (first) names.

    sur_name

    (character) A sur (last) name.

    organization

    (character) One or more organization names.

    email

    (character) An email address.

    phone

    (character) A phone number.

    address

    (address) An object of type 'address' (EML).

    ...

    Arguments passed on to eml_party().

    Value

    -

    (creator) The new creator

    +

    (creator) The new creator.

    Examples

    -
    eml_creator("test", "user", "test@user.com")
    #> <creator system="uuid"> -#> <individualName> -#> <givenName>test</givenName> -#> <surName>user</surName> -#> </individualName> -#> <organizationName>test@user.com</organizationName> -#> </creator>
    +
    # NOT RUN {
    +eml_creator("test", "user", email = "test@user.com")
    +eml_creator("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766")
    +eml_creator("creator", c("Dominic", "'Dom'"), "Mullen", c("NCEAS", "UCSB"),
    +            c("Data Scientist", "Programmer"))
    +# }
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/eml_geographic_coverage.html b/docs/reference/eml_geographic_coverage.html new file mode 100644 index 0000000..eeb7953 --- /dev/null +++ b/docs/reference/eml_geographic_coverage.html @@ -0,0 +1,194 @@ + + + + + + + + +Create an EML geographicCoverage section — eml_geographic_coverage • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    A simple way to create an EML geographicCoverage section.

    + +
    + +
    eml_geographic_coverage(description, north, east, south, west)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + + + + + +
    description

    (character) A textual description.

    north

    (numeric) North bounding coordinate.

    east

    (numeric) East bounding coordinate.

    south

    (numeric) South bounding coordinate.

    west

    (numeric) West bounding coordinate.

    + +

    Value

    + +

    (geographicCoverage) The new geographicCoverage section.

    + +

    Details

    + +

    For a bounding box, all coordinates should be unique. +For a single point, the North and South bounding coordinates should be the same and +the East and West bounding coordinates should be the same.

    + + +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/eml_individual_name.html b/docs/reference/eml_individual_name.html index 871b2f8..437815d 100644 --- a/docs/reference/eml_individual_name.html +++ b/docs/reference/eml_individual_name.html @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - +
    @@ -68,19 +109,23 @@ -
    +
    +
    -

    Create an EML individualName section

    +

    Create an EML individualName section.

    +
    -
    eml_individual_name(given_names, sur_name)
    +
    eml_individual_name(given_names = NULL, sur_name)
    -

    Arguments

    +

    Arguments

    @@ -95,9 +140,14 @@

    Ar

    Value

    -

    (individualName) The new individualName section

    +

    (individualName) The new individualName section.

    +

    Examples

    +
    eml_individual_name("some", "user")
    #> <individualName> +#> <givenName>some</givenName> +#> <surName>user</surName> +#> </individualName>
    @@ -116,11 +168,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/eml_metadata_provider.html b/docs/reference/eml_metadata_provider.html index c8bd5ee..9ffd574 100644 --- a/docs/reference/eml_metadata_provider.html +++ b/docs/reference/eml_metadata_provider.html @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,45 +109,28 @@ -
    +
    +
    -

    Create an EML metadataProvider

    +

    See eml_party() for details.

    +
    -
    eml_metadata_provider(given_names, sur_name, organization = NULL,
    -  email = NULL, phone = NULL, address = NULL)
    +
    eml_metadata_provider(...)
    -

    Arguments

    +

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - + +
    given_names

    (character) One or more given (first) names.

    sur_name

    (character) A sur (last) name.

    organization

    (character) One or more organization names.

    email

    (character) An email address.

    phone

    (character) A phone number.

    address

    (address) An object of type 'address' (EML).

    ...

    Arguments passed on to eml_party().

    @@ -116,12 +140,12 @@

    Value

    Examples

    -
    eml_metadata_provider("test", "user", "test@user.com")
    #> <metadataProvider system="uuid"> +
    eml_metadata_provider("test", "user", email = "test@user.com")
    #> <metadataProvider system="uuid"> #> <individualName> #> <givenName>test</givenName> #> <surName>user</surName> #> </individualName> -#> <organizationName>test@user.com</organizationName> +#> <electronicMailAddress>test@user.com</electronicMailAddress> #> </metadataProvider>
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/eml_otherEntity_to_dataTable.html b/docs/reference/eml_otherEntity_to_dataTable.html new file mode 100644 index 0000000..019994a --- /dev/null +++ b/docs/reference/eml_otherEntity_to_dataTable.html @@ -0,0 +1,195 @@ + + + + + + + + +Convert otherEntities to dataTables — eml_otherEntity_to_dataTable • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Convert an EML 'otherEntity' object to a 'dataTable' object. This will convert an +otherEntity object as currently constructed - it does not add a physical or add attributes. +However, if these are already in their respective slots, they will be retained.

    + +
    + +
    eml_otherEntity_to_dataTable(eml, otherEntity, validate_eml = TRUE)
    + +

    Arguments

    + + + + + + + + + + + + + + +
    eml

    (S4) An EML S4 object.

    otherEntity

    (S4 / integer) Either an EML otherEntity object or the index +of an otherEntity within a ListOfotherEntity. Integer input is recommended.

    validate_eml

    (logical) Optional. Whether or not to validate the EML after +completion. Setting this to FALSE reduces execution time by ~50 percent.

    + + +

    Examples

    +
    # NOT RUN {
    +eml <- read_eml(system.file("example-eml.xml", package = "arcticdatautils"))
    +
    +# The following two calls are equivalent:
    +eml <- eml_otherEntity_to_dataTable(eml, eml@dataset@otherEntity[[1]])
    +eml <- eml_otherEntity_to_dataTable(eml, 1)
    +
    +# Integer input is recommended:
    +eml <- eml_otherEntity_to_dataTable(eml, 1)
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/eml_party.html b/docs/reference/eml_party.html new file mode 100644 index 0000000..87c3a18 --- /dev/null +++ b/docs/reference/eml_party.html @@ -0,0 +1,226 @@ + + + + + + + + +Create an EML party — eml_party • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    You will usually want to use the high-level functions such as +eml_creator() and eml_contact() but using this is fine.

    + +
    + +
    eml_party(type = "associatedParty", given_names = NULL,
    +  sur_name = NULL, organization = NULL, position = NULL,
    +  email = NULL, phone = NULL, address = NULL, userId = NULL,
    +  role = NULL)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    type

    (character) The type of party (e.g. 'contact').

    given_names

    (character) The party's given name(s).

    sur_name

    (character) The party's surname.

    organization

    (character) The party's organization name.

    position

    (character) The party's position.

    email

    (character) The party's email address(es).

    phone

    (character) The party's phone number(s).

    address

    (character) The party's address(es).

    userId

    (character) The party's ORCID, in format https://orcid.org/WWWW-XXXX-YYYY-ZZZZ.

    role

    (character) The party's role.

    + +

    Value

    + +

    (party) An instance of the party specified by the type argument.

    + +

    Details

    + +

    The userId argument assumes an ORCID so be sure to adjust for that.

    + + +

    Examples

    +
    # NOT RUN {
    +eml_party("creator", "Test", "User")
    +eml_party("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766")
    +eml_party("creator", c("Dominic", "'Dom'"), "Mullen", c("NCEAS", "UCSB"),
    +          c("Data Scientist", "Programmer"))
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/eml_personnel.html b/docs/reference/eml_personnel.html new file mode 100644 index 0000000..dd69951 --- /dev/null +++ b/docs/reference/eml_personnel.html @@ -0,0 +1,185 @@ + + + + + + + + +Create an EML personnel — eml_personnel • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    See eml_party() for details.

    + +
    + +
    eml_personnel(role = NULL, ...)
    + +

    Arguments

    + + + + + + + + + + +
    role

    (character) Personnel role, e.g. "principalInvestigator".

    ...

    Arguments passed on to eml_party().

    + +

    Value

    + +

    (personnel) The new personnel.

    + + +

    Examples

    +
    eml_personnel("test", "user", email = "test@user.com", role = "principalInvestigator")
    #> <personnel system="uuid"> +#> <individualName> +#> <givenName>test</givenName> +#> <surName>user</surName> +#> </individualName> +#> <electronicMailAddress>test@user.com</electronicMailAddress> +#> <role>principalInvestigator</role> +#> </personnel>
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/eml_project.html b/docs/reference/eml_project.html index 114e5b5..686c5ba 100644 --- a/docs/reference/eml_project.html +++ b/docs/reference/eml_project.html @@ -6,7 +6,7 @@ -Create an eml-project section. — eml_project • arcticdatautils +Create an EML project section — eml_project • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - +
    @@ -68,45 +109,56 @@ -
    +
    +
    -

    Note: This is super-limited right now.

    +

    Create an EML project section.

    +
    -
    eml_project(title, awards, first, last, organizations = NULL,
    -  role = "originator")
    +
    eml_project(title, personnelList, abstract = NULL, funding = NULL,
    +  studyAreaDescription = NULL, designDescription = NULL,
    +  relatedProject = NULL)
    -

    Arguments

    +

    Arguments

    - + + + + + - - + + - - + + - - + + - - + + - - + +
    title

    (character) Title of the project.

    (character) Title of the project (Required). May have multiple titles.

    personnelList

    (list of personnel) Personnel involved with the project.

    awards

    (character) One or more awards for the project.

    abstract

    (character) Project abstract. Can pass as a character vector +for separate paragraphs.

    first

    (character) First name of the person with role `role`.

    funding

    (character) Funding sources for the project such as grant and +contract numbers. Can pass as a character vector for separate paragraphs.

    last

    (character) Last name of the person with role `role`.

    studyAreaDescription

    (studyAreaDescription)

    organizations

    (character) Optional. One or more organization strings.

    designDescription

    (designDescription)

    role

    (character) Optional. Specify an alternate role.

    relatedProject

    (project)

    @@ -114,18 +166,18 @@

    Value

    (project) The new project section.

    +

    Details

    + +

    Note - studyAreaDescription, designDescription, and relatedProject are not +fully fleshed out. Need to pass these objects in directly if you want to use +them.

    +

    Examples

    -
    eml_project("Some title", "51231", "Some", "User")
    #> <project system="uuid"> -#> <title>Some title</title> -#> <personnel> -#> <individualName> -#> <givenName>Some</givenName> -#> <surName>User</surName> -#> </individualName> -#> <role>originator</role> -#> </personnel> -#> </project>
    +
    proj <- eml_project(c("Some title", "A second title if needed"), + c(eml_personnel("Bryce", "Mecum", role = "principalInvestigator")), + c("Abstract paragraph 1", "Abstract paragraph 2"), + "Funding Agency: Award Number 12345")
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/eml_set_reference.html b/docs/reference/eml_set_reference.html new file mode 100644 index 0000000..f34dee5 --- /dev/null +++ b/docs/reference/eml_set_reference.html @@ -0,0 +1,200 @@ + + + + + + + + +Set a reference to an EML object — eml_set_reference • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    This function creates a new object with the same class as element_to_replace +using a reference to element_to_reference.

    + +
    + +
    eml_set_reference(element_to_reference, element_to_replace)
    + +

    Arguments

    + + + + + + + + + + +
    element_to_reference

    (S4) An EML object to reference.

    element_to_replace

    (S4) An EML object to replace with a reference.

    + + +

    Examples

    +
    # NOT RUN {
    +cn <- dataone::CNode('PROD')
    +adc <- dataone::getMNode(cn,'urn:node:ARCTIC')
    +eml <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M'))
    +
    +# Set the first contact as a reference to the first creator
    +eml@dataset@contact[[1]] <- eml_set_reference(eml@dataset@creator[[1]],
    +eml@dataset@contact[[1]])
    +
    +# This is also useful when we want to set references to a subset of 'dataTable'
    +  or 'otherEntity' objects
    +# Add a few more objects first to illustrate the use:
    +eml@dataset@dataTable[[3]] <- eml@dataset@dataTable[[1]]
    +eml@dataset@dataTable[[4]] <- eml@dataset@dataTable[[1]]
    +# Add references to the second and third elements only (not the 4th):
    +for (i in 2:3) {
    +    eml@dataset@dataTable[[i]]@attributeList <- eml_set_reference(eml@dataset@dataTable[[1]]@attributeList,
    +                                                      eml@dataset@dataTable[[i]]@attributeList)
    +}
    +# If we print the entire 'dataTable' list we see elements 2 and 3 have references while 4 does not.
    +eml@dataset@dataTable
    +# }
    +
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/eml_set_shared_attributes.html b/docs/reference/eml_set_shared_attributes.html new file mode 100644 index 0000000..9d610de --- /dev/null +++ b/docs/reference/eml_set_shared_attributes.html @@ -0,0 +1,198 @@ + + + + + + + + +Set shared attribute references — eml_set_shared_attributes • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    This function sets shared attributes using the attributes of the first type +selected and creates references for all remaining objects of equivalent type.

    + +
    + +
    eml_set_shared_attributes(eml, attributeList = NULL,
    +  type = "dataTable")
    + +

    Arguments

    + + + + + + + + + + + + + + +
    eml

    (eml) An EML object.

    attributeList

    (attributeList) Optional. An EML attributeList object. If not provided +then it will default to the attributeList of the first type element.

    type

    (character) Optional. Specifies whether to replace 'dataTable' or 'otherEntity' attributeList +objects with references. Defaults to 'dataTable'.

    + +

    Value

    + +

    (eml) The modified EML document.

    + + +

    Examples

    +
    # NOT RUN {
    +cn <- dataone::CNode('PROD')
    +adc <- dataone::getMNode(cn,'urn:node:ARCTIC')
    +eml <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M'))
    +atts <- EML::set_attributes(EML::get_attributes(eml@dataset@dataTable[[1]]@attributeList)$attributes)
    +
    +eml <- eml_set_shared_attributes(eml, atts, type = 'dataTable')
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/eml_validate_attributes.html b/docs/reference/eml_validate_attributes.html index e836533..e6cd92e 100644 --- a/docs/reference/eml_validate_attributes.html +++ b/docs/reference/eml_validate_attributes.html @@ -18,14 +18,31 @@ + + + + + - + + + + + + + + + - +
    @@ -68,48 +113,47 @@ -
    +
    +

    The attributes passed into this function are validated one-by-one and the progress of going through each attribute is printed to the screen along -with any and all validation issues.

    +with any and all validation issues. This is done by, for each attribute in the list, +creating a minimum valid EML document and adding a new otherEntity with a new +attributeList containing the single attribute to be validated.

    +
    -
    eml_validate_attributes(attributes)
    +
    eml_validate_attributes(attributes)
    -

    Arguments

    +

    Arguments

    - +
    attributes

    (attributeList) An attributeList

    (attributeList) An attributeList.

    Value

    -

    (boolean) Named vector of TRUE/FALSE indicating which attributes -are valid

    - -

    Details

    - -

    This is done by, for each attribute in the list, creating a minimum valid -EML document and adding a new otherEntity with a new attributeList containing -the single attribute to be validated.

    +

    (logical) Named vector indicating which attributes are valid.

    Examples

    -
    ## Not run: ------------------------------------ -# atts_df <- read.csv('attributes_table.csv', stringsAsFactors = F) -# enum_domain <- read.csv('enumerated_domain.csv') # optional -# attributes <- EML::set_attributes(atts_df, factor = enum_domain) -# eml_validate_attributes(attributes) -## ---------------------------------------------
    +
    # NOT RUN {
    +atts_df <- read.csv('attributes_table.csv', stringsAsFactors = F)
    +enum_domain <- read.csv('enumerated_domain.csv') # optional
    +attributes <- EML::set_attributes(atts_df, factor = enum_domain)
    +eml_validate_attributes(attributes)
    +# }
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/env_get.html b/docs/reference/env_get.html index 76e4c73..3a3adbe 100644 --- a/docs/reference/env_get.html +++ b/docs/reference/env_get.html @@ -6,8 +6,7 @@ -environment.R -Author: Bryce Mecum <mecum@nceas.ucsb.edu> — env_get • arcticdatautils +Get the current environment name — env_get • arcticdatautils @@ -19,14 +18,27 @@ + + + + + - + + + + + + + + + - +
    @@ -69,20 +109,21 @@ -
    +
    +
    -

    Functions related to loading configuriation based upon the environment -the code is being run under. -Get the current environment name.

    +

    Get the current environment name.

    +
    -
    env_get()
    +
    env_get()

    Value

    @@ -106,11 +147,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/find_newest_object.html b/docs/reference/find_newest_object.html index 0dc25d3..bd3c004 100644 --- a/docs/reference/find_newest_object.html +++ b/docs/reference/find_newest_object.html @@ -6,7 +6,7 @@ -Find the newest (by dateUploaded) object within a given set of objects. — find_newest_object • arcticdatautils +Find the newest object within the given set of objects — find_newest_object • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - +
    @@ -68,32 +109,36 @@ -
    +
    +
    -

    Find the newest (by dateUploaded) object within a given set of objects.

    +

    Find the newest object, based on dateUploaded, within the given set of objects.

    +
    -
    find_newest_object(node, identifiers, rows = 1000)
    +
    find_newest_object(node, identifiers, rows = 1000)
    -

    Arguments

    +

    Arguments

    - + - - + + - - + +
    node

    (MNode | CNode) The node to query

    (MNode/CNode) The Member Node to query.

    rows

    (numeric) Optional. Specify the size of the query result set.

    identifiers

    (character) One or more identifiers.

    identiifers

    (character) One or more identifiers

    rows

    (numeric) Optional. Specify the size of the query result set.

    @@ -104,10 +149,10 @@

    Value

    Examples

    -
    ## Not run: ------------------------------------ -# mn <- MNode(...) -# find_newest_object(mn, c("PIDX", "PIDY", "PIDZ")) -## ---------------------------------------------
    +
    # NOT RUN {
    +mn <- MNode(...)
    +find_newest_object(mn, c("PIDX", "PIDY", "PIDZ"))
    +# }
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/format_eml.html b/docs/reference/format_eml.html index 092895a..15d9c74 100644 --- a/docs/reference/format_eml.html +++ b/docs/reference/format_eml.html @@ -6,7 +6,7 @@ -Helper function to generate the EML 2.1.1 format ID. — format_eml • arcticdatautils +Generate the EML 2.1.1 format ID — format_eml • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - +
    @@ -68,17 +109,21 @@ -
    +
    +
    -

    Helper function to generate the EML 2.1.1 format ID.

    +

    Returns the EML 2.1.1 format ID.

    +
    -
    format_eml()
    +
    format_eml()

    Value

    @@ -86,15 +131,11 @@

    Value

    Examples

    -
    format_eml
    #> function() { -#> "eml://ecoinformatics.org/eml-2.1.1" -#> } -#> <environment: namespace:arcticdatautils>
    -## Not run: ------------------------------------ -# # Upload a local EML 2.1.1 file: -# env <- env_load() -# publish_object(env$mn, "path_to_some_EML_file", format_eml()) -## ---------------------------------------------
    +
    format_eml()
    #> [1] "eml://ecoinformatics.org/eml-2.1.1"
    # NOT RUN { +# Upload a local EML 2.1.1 file: +env <- env_load() +publish_object(env$mn, "path_to_some_EML_file", format_eml()) +# }
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/format_iso.html b/docs/reference/format_iso.html index a1763b5..f5d68c8 100644 --- a/docs/reference/format_iso.html +++ b/docs/reference/format_iso.html @@ -6,7 +6,7 @@ -dataone_formats.R — format_iso • arcticdatautils +Generate the ISO 19139 format ID — format_iso • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - +
    @@ -68,22 +109,21 @@ -
    +
    +
    -

    A set of thin functions which return the DataONE format ID string. These are -to aid in filling in function arguments and can't remember or don't want to -type in the full format ID. By putting these format ID strings into -functions, a user's autocompletion routine in their editor can help them -fill in the format ID they want. -Helper function to generate the ISO 19139 format ID.w

    +

    Returns the ISO 19139 format ID.

    +
    -
    format_iso()
    +
    format_iso()

    Value

    @@ -91,11 +131,11 @@

    Value

    Examples

    -
    format_iso()
    #> [1] "http://www.isotc211.org/2005/gmd"
    ## Not run: ------------------------------------ -# # Upload a local ISO19139 XML file: -# env <- env_load() -# publish_object(env$mn, "path_to_some_EML_file", format_iso()) -## ---------------------------------------------
    +
    format_iso()
    #> [1] "http://www.isotc211.org/2005/gmd"
    # NOT RUN { +# Upload a local ISO19139 XML file: +env <- env_load() +publish_object(env$mn, "path_to_some_EML_file", format_iso()) +# }
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/generate_resource_map.html b/docs/reference/generate_resource_map.html index 5af2e1f..a3ed02d 100644 --- a/docs/reference/generate_resource_map.html +++ b/docs/reference/generate_resource_map.html @@ -6,9 +6,7 @@ -Create a resource map RDF/XML file and save is to a temporary path. -This is a convenience wrapper around the constructor of the `ResourceMap` -class from `DataPackage`. — generate_resource_map • arcticdatautils +Create a resource map RDF/XML file and save is to a temporary path — generate_resource_map • arcticdatautils @@ -20,14 +18,28 @@ + + + + + - + + + + + + + + + - +
    @@ -70,62 +110,67 @@ -
    +
    +
    -

    Create a resource map RDF/XML file and save is to a temporary path. -This is a convenience wrapper around the constructor of the `ResourceMap` -class from `DataPackage`.

    +

    This is a convenience wrapper around the constructor of the ResourceMap +class from DataPackage.

    +
    -
    generate_resource_map(metadata_pid, data_pids = NULL, child_pids = NULL,
    -  other_statements = NULL,
    +    
    generate_resource_map(metadata_pid, data_pids = NULL,
    +  child_pids = NULL, other_statements = NULL,
       resolve_base = "https://cn.dataone.org/cn/v2/resolve",
       resource_map_pid = NULL)
    -

    Arguments

    +

    Arguments

    - + - + - + - + + + + +
    metadata_pid

    (character) PID of the metadata Object.

    (character) PID of the metadata object.

    data_pids

    (character) PID(s) of the data Objects.

    (character) PID(s) of the data objects.

    child_pids

    (character) Optional. PID(s) of child Resource Maps.

    (character) Optional. PID(s) of child resource maps.

    other_statements

    (data.frame) Extra statements to add to the Resource Map.

    (data.frame) Extra statements to add to the resource map.

    resolve_base

    (character) Optional. The resolve service base URL.

    resource_map_pid

    (character) The PID of a resource map.

    Value

    -

    Absolute path to the Resource Map on disk (character)

    +

    (character) Absolute path to the resource map on disk.

    Examples

    -
    ## Not run: ------------------------------------ -# generate_resource_map("X", "Y", "Z", -# other_statements = data.frame(subject="http://example.com/me", -# predicate="http://example.com/foo", -# object="http://example.com/bar")) -## ---------------------------------------------
    +
    # NOT RUN {
    +generate_resource_map("X", "Y", "Z",
    +                      other_statements = data.frame(subject="http://example.com/me",
    +                                                    predicate="http://example.com/foo",
    +                                                    object="http://example.com/bar"))
    +# }
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/get_all_sysmeta.html b/docs/reference/get_all_sysmeta.html new file mode 100644 index 0000000..39be3c6 --- /dev/null +++ b/docs/reference/get_all_sysmeta.html @@ -0,0 +1,205 @@ + + + + + + + + +Get system metadata for all elements of a data package — get_all_sysmeta • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    This function retrieves the system metadata for all elements of a data package and returns them as a list. +It is useful for inspecting system metadata for an entire data package and identifying changes where needed.

    + +
    + +
    get_all_sysmeta(mn, resource_map_pid, nmax = 1000,
    +  child_packages = FALSE)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + +
    mn

    (MNode) The Member Node to query.

    resource_map_pid

    (character) The PID for a resource map.

    nmax

    (numeric) The maximum number of system metadata objects to return.

    child_packages

    (logical) If parent package, whether or not to include child packages.

    + +

    Value

    + +

    (list) A list of system metadata objects.

    + + +

    Examples

    +
    # NOT RUN {
    +cn_staging <- CNode("STAGING")
    +adc_test <- getMNode(cn_staging, "urn:node:mnTestARCTIC")
    +
    +rm_pid <- "resource_map_urn:uuid:..."
    +
    +all <- get_all_sysmeta(adc_test, rm_pid)
    +
    +# View in viewer to inspect
    +View(all)
    +
    +# Print specific elements to console
    +all[[1]]@rightsHolder
    +
    +# Create separate object
    +sysmeta_md <- all[[2]]
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/get_all_versions.html b/docs/reference/get_all_versions.html index 79e7286..ae5c09d 100644 --- a/docs/reference/get_all_versions.html +++ b/docs/reference/get_all_versions.html @@ -6,7 +6,7 @@ -Get the PIDs of all versions of an object. — get_all_versions • arcticdatautils +Get the PIDs of all versions of an object — get_all_versions • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - +
    @@ -68,24 +109,28 @@ -
    +
    +

    Get the PIDs of all versions of an object.

    +
    -
    get_all_versions(node, pid)
    +
    get_all_versions(node, pid)
    -

    Arguments

    +

    Arguments

    - + @@ -98,6 +143,14 @@

    Value

    (character) A vector of PIDs in the chain, in order.

    +

    Examples

    +
    # NOT RUN {
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pid <- "urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1"
    +
    +ids <- get_all_versions(mn, pid)
    +# }
    @@ -116,11 +171,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/get_mn_base_url.html b/docs/reference/get_mn_base_url.html index 7c07e6d..2fb6ef5 100644 --- a/docs/reference/get_mn_base_url.html +++ b/docs/reference/get_mn_base_url.html @@ -6,7 +6,7 @@ -Get the base URL of the Member Node. — get_mn_base_url • arcticdatautils +Get base URL of a Member Node — get_mn_base_url • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,34 +109,51 @@ -
    +
    +
    -

    Get the base URL of the Member Node.

    +

    Get the base URL of a Member Node.

    +
    -
    get_mn_base_url(mn)
    +
    get_mn_base_url(mn)
    -

    Arguments

    +

    Arguments

    node

    (MNode|CNode) The node to query.

    (MNode) The Member Node to query.

    pid
    - +
    mn

    (character) The Member Node.

    +

    Value

    +

    (character) The URL.

    + + +

    Examples

    +
    # NOT RUN {
    +cn <- CNode('STAGING2')
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +# }
    @@ -106,11 +164,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/get_ncdf4_attributes.html b/docs/reference/get_ncdf4_attributes.html index d8b5dd6..c0dbc54 100644 --- a/docs/reference/get_ncdf4_attributes.html +++ b/docs/reference/get_ncdf4_attributes.html @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,36 +109,40 @@ -
    +
    +
    -

    Get a data.frame of attributes from a NetCDF object

    +

    Get a data.frame of attributes from a NetCDF object.

    +
    -
    get_ncdf4_attributes(nc)
    +
    get_ncdf4_attributes(nc)
    -

    Arguments

    +

    Arguments

    - +
    nc

    (ncdf4 or character) Either a ncdf4 object or a file path

    (ncdf4/character) Either a ncdf4 object or a file path.

    Value

    -

    (data.frame) A data.frame of the attributes

    +

    (data.frame) A data.frame of the attributes.

    Examples

    -
    ## Not run: ------------------------------------ -# get_ncdf4_attributes("./path/to/my.nc") -## ---------------------------------------------
    +
    # NOT RUN {
    +get_ncdf4_attributes("./path/to/my.nc")
    +# }
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/get_package.html b/docs/reference/get_package.html index f7a7e89..7091f7c 100644 --- a/docs/reference/get_package.html +++ b/docs/reference/get_package.html @@ -6,7 +6,7 @@ -Get a structured list of PIDs for the objects in a package. — get_package • arcticdatautils +Get a structured list of PIDs for the objects in a package — get_package • arcticdatautils @@ -18,14 +18,28 @@ + + + + + - + + + + + + + + + - +
    @@ -68,29 +110,33 @@ -
    +
    +
    -

    This is a wrapper function around `get_package_direct` which takes either -a resource map PID or a metadata PID as its `pid` argument.

    +

    Get a structured list of PIDs for the objects in a package, +including the resource map, metadata, and data objects.

    +
    -
    get_package(node, pid, file_names = FALSE, rows = 1000)
    +
    get_package(node, pid, file_names = FALSE, rows = 5000)
    -

    Arguments

    +

    Arguments

    - + - + @@ -99,7 +145,7 @@

    Ar

    +useful to set if you are warned about the result set being truncated. Defaults to 5000.

    node

    (MNode|CNode) The Coordinating/Member Node to run the query on.

    (MNode/CNode) The Coordinating/Member Node to run the query on.

    pid

    (character) The the metadata PID of the package.

    (character) The the resource map PID of the package.

    file_names
    rows

    (numeric) The number of rows to return in the query. This is only -useful to set if you are warned about the result set being truncated.

    @@ -108,6 +154,15 @@

    Value

    (list) A structured list of the members of the package.

    +

    Examples

    +
    # NOT RUN {
    +#Set environment
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pid <- "resource_map_urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1"
    +
    +ids <- get_package(mn, pid)
    +# }
    @@ -126,11 +183,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/get_token.html b/docs/reference/get_token.html index bb02aed..ab6c47e 100644 --- a/docs/reference/get_token.html +++ b/docs/reference/get_token.html @@ -6,7 +6,7 @@ -Gets the currently set authentication token. — get_token • arcticdatautils +Get the currently set authentication token — get_token • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,24 +109,28 @@ -
    +
    +
    -

    Gets the currently set authentication token.

    +

    Get the currently set authentication token.

    +
    -
    get_token(node)
    +
    get_token(node)
    -

    Arguments

    +

    Arguments

    - +
    node

    (MNode|CNode) The CN or MN you want to find a token for.

    (MNode/CNode) The Member/Coordinating Node to query.

    @@ -94,6 +139,12 @@

    Value

    (character) The token.

    +

    Examples

    +
    # NOT RUN {
    +cn <- CNode('STAGING2')
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +get_token(mn)
    +# }
    @@ -112,11 +165,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/guess_format_id.html b/docs/reference/guess_format_id.html index c66849b..061ba66 100644 --- a/docs/reference/guess_format_id.html +++ b/docs/reference/guess_format_id.html @@ -6,7 +6,7 @@ -Guess format from filename for a vector of filenames. — guess_format_id • arcticdatautils +Guess format from filename — guess_format_id • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,32 +109,38 @@ -
    +
    +

    Guess format from filename for a vector of filenames.

    +
    -
    guess_format_id(filenames)
    +
    guess_format_id(filenames)
    -

    Arguments

    +

    Arguments

    - +
    filenames

    (character)

    (character) A vector of filenames.

    Value

    -

    (character) DataOne format identifiers strings.

    +

    (character) DataONE format IDs.

    +

    Examples

    +
    formatid <- guess_format_id("temperature_data.csv")
    @@ -112,11 +161,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/index.html b/docs/reference/index.html index 90e4f06..1e7dd43 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -18,14 +18,24 @@ + + + + + - + + + + + + - + @@ -68,692 +106,432 @@ -
    -
    +
    +
    -
    - +
    - - - - + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -

    All functions

    -

    -
    -

    add_access_rules

    -

    sysmeta.R

    -

    add_additional_identifiers

    -

    Add a set of additional identifiers to an EML document.

    -

    add_admin_group_access

    -

    Adds access to the given System Metadata for the arctic-data-admins group

    -

    add_methods_step

    -

    Adds a step to the methods document

    -

    add_string_to_title

    -

    Adds a string to the title element in the given file.

    -

    change_eml_name

    -

    Utility function to extract a name string from an XML individualName node, -parse it into tokens,and reformat the individualName with new children nodes

    -

    clear_methods

    -

    Clear all methods from the document.

    -

    clear_replication_policy

    -

    Clear the replication policy from a System Metadata object

    -

    convert_iso_to_eml

    -

    Convert and ISO document to EML using an XSLT.

    -

    create_dummy_metadata

    -

    helpers.R

    -

    create_dummy_object

    -

    Create a test object.

    -

    create_dummy_package

    -

    Create a test package.

    -

    create_dummy_parent_package

    -

    Create a test parent package.

    -

    create_from_folder

    -

    inserting.R

    -

    create_object

    -

    Create an object from a row of the inventory.

    -

    create_resource_map

    -

    Create a resource map Object on a Member Node.

    -

    create_sysmeta

    -

    Create a sysmeta object.

    -

    determine_child_pids

    -

    Calculate a set of child PIDs for a given package.

    -

    eml_add_entities

    -

    Add new entity (otherEntity, dataTable, etc) elements to an EML document from a table.

    -

    eml_address

    -

    Create an EML address element.

    -

    eml_associated_party

    -

    Create an EML associatedParty

    -

    eml_contact

    -

    Create an EML contact.

    -

    eml_creator

    -

    Create an EML creator

    -

    eml_individual_name

    -

    Create an EML individualName section

    -

    eml_metadata_provider

    -

    Create an EML metadataProvider

    -

    eml_project

    -

    Create an eml-project section.

    -

    eml_validate_attributes

    -

    Validate an EML attributeList attribute-by-attribute

    -

    env_get

    -

    environment.R -Author: Bryce Mecum <mecum@nceas.ucsb.edu>

    -

    env_load

    -

    Load environmental variables from a YAML-formatted environment file.

    -

    extract_local_identifier

    -

    util.R -Author: Bryce Mecum <mecum@nceas.ucsb.edu>

    -

    filter_obsolete_pids

    -

    Filters PIDs that are obsolete.

    -

    filter_packaging_statements

    -

    Filter statements related to packaging

    -

    find_newest_object

    -

    Find the newest (by dateUploaded) object within a given set of objects.

    -

    find_newest_resource_map

    -

    Get the resource map(s) for the given object.

    -

    fix_bad_enum

    -

    Fix a metadata record with a bad topicCategory.

    -

    fix_bad_topic

    -

    Fix a metadata record with multiple MD_TopicCategory children elements -inside a single topicCategory element.

    -

    format_eml

    -

    Helper function to generate the EML 2.1.1 format ID.

    -

    format_iso

    -

    dataone_formats.R

    -

    generate_resource_map_pid

    -

    Generate a PID for a new resource map by appending "resource_map_" to it.

    -

    generate_resource_map

    -

    Create a resource map RDF/XML file and save is to a temporary path. -This is a convenience wrapper around the constructor of the `ResourceMap` -class from `DataPackage`.

    -

    get_all_versions

    -

    Get the PIDs of all versions of an object.

    -

    get_chain_neighbors

    -

    Get the obsoleted/obsoletedBy properties of an object as a named list.

    -

    get_current_version

    -

    Get the current package version.

    -

    get_doc_id

    -

    Get the Metacat docid for the given identifier

    -

    get_identifier

    -

    Get the identifier from a DataONE response.

    -

    get_latest_release

    -

    Use the GitHub API to find the latest release for the package.

    -

    get_mn_base_url

    -

    Get the base URL of the Member Node.

    -

    get_ncdf4_attributes

    -

    Get a data.frame of attributes from a NetCDF object

    -

    get_netcdf_format_id

    -

    Determine the DataONE format ID for the NetCDF file provided by path.

    -

    get_or_create_pid

    -

    Get the already-minted PID from the inventory or mint a new one.

    -

    get_package_direct

    -

    Get a structured list of PIDs for the objects in a package.

    -

    get_package

    -

    Get a structured list of PIDs for the objects in a package.

    -

    get_token_subject

    -

    Returns the subject of the set dataone_test_token

    -

    get_token

    -

    Gets the currently set authentication token.

    -

    guess_format_id

    -

    Guess format from filename for a vector of filenames.

    -

    insert_file

    -

    package.R -Author: Bryce Mecum <mecum@nceas.ucsb.edu>

    -

    insert_package

    -

    Create a single package Data Package from files in the Inventory.

    -

    inv_add_extra_columns

    -

    Adds a set of extra columsn to the inventory that are useful for working -with them.

    -

    inv_add_parent_package_column

    -

    Add a column for parent packages.

    -

    inv_init

    -

    inventory.R -Author: Bryce Mecum <mecum@nceas.ucsb.edu>

    -

    inv_load_checksums

    -

    Load checksums into the inventory file from a text file. This function -removes the column 'checksum_sha256' from inventory before doing a -left join.

    -

    inv_load_dois

    -

    Load DOIs from a text file into the Inventory.

    -

    inv_load_files

    -

    Load files into the inventory from a text file.

    -

    inv_load_identifiers

    -

    Load identifiers into the inventory file(s) from a text file. This function -removes the column 'identifier' from inventory before doing a -left join.

    -

    inv_load_sizes

    -

    Load file sizes into an inventory from a text file. Removes the column -'size_bytes' from inventory before doing a left join.

    -

    inv_update

    -

    Update an Inventory with a new Inventory.

    -

    is_authorized

    -

    Check if the user has authorization to perform an action on an object.

    -

    is_format_id

    -

    Test whether an object is a particular format ID.

    -

    is_obsolete

    -

    Test whether the object is obsoleted by another object.

    -

    is_resource_map

    -

    Determines whether the object with the given PID is a resource map.

    -

    is_token_expired

    -

    Determine whether the set token is expired.

    -

    is_token_set

    -

    dataone.R

    -

    log_message

    -

    Log a message to the console and to a logfile.

    -

    new_uuid

    -

    Helper function to generate a new UUID PID.

    -

    object_exists

    -

    Check if an object exists on a Member Node.

    -

    parse_resource_map

    -

    Parse a Resource Map into a data.frame

    -

    path_join

    -

    (Intelligently) join (possibly redudant) path parts together.

    -

    pid_to_other_entity

    -

    eml.R

    -

    pretty_print

    -

    Uses XMLStarlet to pretty-print/beautify an XML document.

    -

    publish_object

    -

    editing.R

    -

    publish_update

    -

    Publish an updated data package.

    -

    remove_public_access

    -

    Remove all public read access rules from a System Metadata document

    -

    remove_public_read

    -

    Remove public access on a set of objects.

    -

    replace_package_id

    -

    Replace the EML 'packageId' attribute on the root element with a -certain value.

    -

    replace_subject

    -

    Replace subjects in the accessPolicy section of a System Metadata entries.

    -

    set_abstract

    -

    Set the abstract on an EML document

    -

    set_access

    -

    Set the access policy for a set of objects.

    -

    set_file_name

    -

    Set the file name on an object

    -

    set_other_entities

    -

    Creates and sets EML otherEntity elements to an existing EML document, -replacing any existing otherEntities

    -

    set_public_read

    -

    Set public access on a set of objects.

    -

    set_rights_and_access

    -

    Set the given subject as the rightsHolder and subject with write and -changePermission access for the given PID.

    -

    set_rights_holder

    -

    access.R

    -

    show_random_dataset

    -

    Print a random dataset.

    -

    substitute_eml_party

    -

    Extract the EML responsible-party blocks in a document, and parse the -surName field to create proper givenName/surName structure

    -

    sysmeta_to_eml_physical

    -

    Create an EML physical subtree from a System Metadata instance

    -

    sysmeta_to_other_entity

    -

    Create an EML otherEntity sub-tree for the given object.

    -

    test_has_abstract

    -

    modify_metadata.R -Author: Bryce Mecum <mecum@nceas.ucsb.edu>

    -

    theme_packages

    -

    marking.R -Author: Bryce Mecum <mecum@nceas.ucsb.edu>

    -

    update_object

    -

    Update an object with a new file.

    -

    update_package

    -

    Update a package with modified metadata.

    -

    update_resource_map

    -

    Update an existing resource map Object on a Member Node.

    -

    validate_environment

    -

    Validate an environment.

    -

    validate_inventory

    -

    Validate an Inventory.

    -

    view_packages

    -

    interactive.R -Author: Bryce Mecum <mecum@nceas.ucsb.edu>

    -

    view_profile

    -

    Get an approximate list of the datasets in a user's profile

    -

    warn_current_version

    -

    Warns if the currently-installed version of the package is not the same -version as the latest release on GitHub.

    -
    + + + +

    All functions

    +

    + + + + + +

    add_methods_step()

    + +

    Add a methods step

    + + + +

    arcticdatautils

    + +

    arcticdatautils: Utilities for the Arctic Data Center

    + + + +

    clear_methods()

    + +

    Clear all methods

    + + + +

    convert_iso_to_eml()

    + +

    Convert an ISO document to EML using an XSLT

    + + + +

    create_dummy_attributes_dataframe()

    + +

    Create test attributes data.frame

    + + + +

    create_dummy_enumeratedDomain_dataframe()

    + +

    Create test enumeratedDomain data.frame

    + + + +

    create_dummy_metadata()

    + +

    Create a test metadata object

    + + + +

    create_dummy_object()

    + +

    Create a test object

    + + + +

    create_dummy_package()

    + +

    Create a test package

    + + + +

    create_dummy_package_full()

    + +

    Create dummy package with fuller metadata

    + + + +

    create_dummy_parent_package()

    + +

    Create a test parent package

    + + + +

    create_resource_map()

    + +

    Create a resource map object on a Member Node

    + + + +

    eml_abstract()

    + +

    Create an EML abstract

    + + + +

    eml_add_entities()

    + +

    Add new entity elements to an EML document from a table

    + + + +

    eml_address()

    + +

    Create an EML address element

    + + + +

    eml_associated_party()

    + +

    Create an EML associatedParty

    + + + +

    eml_contact()

    + +

    Create an EML contact

    + + + +

    eml_creator()

    + +

    Create an EML creator

    + + + +

    eml_geographic_coverage()

    + +

    Create an EML geographicCoverage section

    + + + +

    eml_individual_name()

    + +

    Create an EML individualName section

    + + + +

    eml_metadata_provider()

    + +

    Create an EML metadataProvider

    + + + +

    eml_otherEntity_to_dataTable()

    + +

    Convert otherEntities to dataTables

    + + + +

    eml_party()

    + +

    Create an EML party

    + + + +

    eml_personnel()

    + +

    Create an EML personnel

    + + + +

    eml_project()

    + +

    Create an EML project section

    + + + +

    eml_set_reference()

    + +

    Set a reference to an EML object

    + + + +

    eml_set_shared_attributes()

    + +

    Set shared attribute references

    + + + +

    eml_validate_attributes()

    + +

    Validate an EML attributeList attribute-by-attribute

    + + + +

    env_get()

    + +

    Get the current environment name

    + + + +

    find_newest_object()

    + +

    Find the newest object within the given set of objects

    + + + +

    format_eml()

    + +

    Generate the EML 2.1.1 format ID

    + + + +

    format_iso()

    + +

    Generate the ISO 19139 format ID

    + + + +

    generate_resource_map()

    + +

    Create a resource map RDF/XML file and save is to a temporary path

    + + + +

    get_all_sysmeta()

    + +

    Get system metadata for all elements of a data package

    + + + +

    get_all_versions()

    + +

    Get the PIDs of all versions of an object

    + + + +

    get_mn_base_url()

    + +

    Get base URL of a Member Node

    + + + +

    get_ncdf4_attributes()

    + +

    Get a data.frame of attributes from a NetCDF object

    + + + +

    get_package()

    + +

    Get a structured list of PIDs for the objects in a package

    + + + +

    get_token()

    + +

    Get the currently set authentication token

    + + + +

    guess_format_id()

    + +

    Guess format from filename

    + + + +

    is_authorized()

    + +

    Check if user has authorization to perform an action on an object

    + + + +

    is_obsolete()

    + +

    Test whether the object is obsoleted by another object

    + + + +

    is_public_read()

    + +

    Check whether an object has public read access

    + + + +

    is_token_expired()

    + +

    Determine whether token is expired

    + + + +

    is_token_set()

    + +

    Test whether a token is set

    + + + +

    mdq_run()

    + +

    Score a metadata document against a MetaDIG suite

    + + + +

    new_uuid()

    + +

    Generate a new UUID PID

    + + + +

    object_exists()

    + +

    Check if an object exists on a Member Node

    + + + +

    parse_resource_map()

    + +

    Parse a resource map into a data.frame

    + + + +

    pid_to_eml_entity()

    + +

    Create EML entity from a DataONE PID

    + + + +

    pid_to_eml_physical()

    + +

    Create EML physical objects for the given set of PIDs

    + + + +

    publish_object()

    + +

    Publish an object on a Member Node

    + + + +

    publish_update()

    + +

    Publish an updated data package

    + + + +

    remove_public_read()

    + +

    Remove public read access for an object

    + + + +

    set_abstract()

    + +

    Set the abstract for an EML document

    + + + +

    set_access()

    + +

    Set the access policy for an object

    + + + +

    set_file_name()

    + +

    Set the file name for an object

    + + + +

    set_public_read()

    + +

    Set public read access for an object

    + + + +

    set_rights_and_access()

    + +

    Set rights holder with access policy for an object

    + + + +

    set_rights_holder()

    + +

    Set the rights holder for an object

    + + + +

    show_indexing_status()

    + +

    Show the indexing status of a set of PIDs

    + + + +

    sysmeta_to_eml_physical()

    + +

    Create an EML physical object from system metadata

    + + + +

    update_object()

    + +

    Update an object with a new file

    + + + +

    update_package_object()

    + +

    Update a data object and associated resource map and metadata

    + + + +

    update_resource_map()

    + +

    Update an existing resource map object on a Member Node

    + + + +

    view_profile()

    + +

    Get an approximate list of the datasets in a user's profile

    + + + +

    which_in_eml()

    + +

    Search through EMLs

    + + +
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/is_authorized.html b/docs/reference/is_authorized.html index 7c712a9..734b739 100644 --- a/docs/reference/is_authorized.html +++ b/docs/reference/is_authorized.html @@ -6,7 +6,7 @@ -Check if the user has authorization to perform an action on an object. — is_authorized • arcticdatautils +Check if user has authorization to perform an action on an object — is_authorized • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - +
    @@ -68,24 +109,28 @@ -
    +
    +

    Check if the user has authorization to perform an action on an object.

    +
    -
    is_authorized(node, ids, action)
    +
    is_authorized(node, ids, action)
    -

    Arguments

    +

    Arguments

    - + @@ -97,13 +142,29 @@

    Ar

    node

    (MNode|CNode) The Node to query.

    (MNode/CNode) The Member/Coordinating Node to query.

    ids
    +

    Value

    +

    (logical)

    + + +

    Examples

    +
    # NOT RUN {
    +cn <- CNode('STAGING2')
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1",
    +"urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe")
    +is_authorized(mn, pids, "write")
    +# }
    @@ -114,11 +175,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/is_obsolete.html b/docs/reference/is_obsolete.html index ad326ca..7b07aa0 100644 --- a/docs/reference/is_obsolete.html +++ b/docs/reference/is_obsolete.html @@ -6,7 +6,7 @@ -Test whether the object is obsoleted by another object. — is_obsolete • arcticdatautils +Test whether the object is obsoleted by another object — is_obsolete • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,19 +109,23 @@ -
    +
    +
    -

    Test whether the object is obsoleted by another object.

    +

    Test whether the object is obsoleted by another object

    +
    -
    is_obsolete(node, pids)
    +
    is_obsolete(node, pids)
    -

    Arguments

    +

    Arguments

    @@ -98,6 +143,15 @@

    Value

    (logical) Whether or not the object is obsoleted by another object.

    +

    Examples

    +
    # NOT RUN {
    +# Set environment
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pid <- "urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1"
    +
    +is_obsolete(mn, pid)
    +# }
    @@ -116,11 +172,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/is_public_read.html b/docs/reference/is_public_read.html new file mode 100644 index 0000000..8e3630b --- /dev/null +++ b/docs/reference/is_public_read.html @@ -0,0 +1,192 @@ + + + + + + + + +Check whether an object has public read access — is_public_read • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Check whether objects have public read access. +No token needs to be set to use this function.

    + +
    + +
    is_public_read(mn, pids, use.names = TRUE)
    + +

    Arguments

    +
    + + + + + + + + + + + + + +
    mn

    (MNode) The Member Node.

    pids

    (character) The PIDs of the objects to check for public read access.

    use.names

    (logical) If TRUE, PIDs will +be used as names for the result unless PIDs have names already, in which case +those names will be used for the result.

    + +

    Value

    + +

    (logical) Whether an object has public read access.

    + + +

    Examples

    +
    # NOT RUN {
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1",
    +    "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe")
    +is_public_read(mn, pids)
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/is_token_expired.html b/docs/reference/is_token_expired.html index 856e1d2..c637b07 100644 --- a/docs/reference/is_token_expired.html +++ b/docs/reference/is_token_expired.html @@ -6,7 +6,7 @@ -Determine whether the set token is expired. — is_token_expired • arcticdatautils +Determine whether token is expired — is_token_expired • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,24 +109,52 @@ -
    +
    +

    Determine whether the set token is expired.

    +
    + +
    is_token_expired(node)
    + +

    Arguments

    + + + + + + +
    node

    (character) The Member Node.

    + +

    Value

    -
    is_token_expired(node)
    - +

    (logical)

    + +

    Examples

    +
    # NOT RUN {
    +cn <- CNode('STAGING2')
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +is_token_expired(mn)
    +# }
    @@ -96,11 +165,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/is_token_set.html b/docs/reference/is_token_set.html index b40f86f..dae1161 100644 --- a/docs/reference/is_token_set.html +++ b/docs/reference/is_token_set.html @@ -6,7 +6,7 @@ -dataone.R — is_token_set • arcticdatautils +Test whether a token is set — is_token_set • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,33 +109,42 @@ -
    +
    +
    -

    Helpers for the DataONE R package. -Test whether a token is set.

    +

    Test whether a token is set.

    +
    -
    is_token_set(node)
    +
    is_token_set(node)
    -

    Arguments

    +

    Arguments

    - +
    node

    (MNode|CNode) The CN or MN you want to find a token for.

    (MNode/CNode) The Member/Coordinating Node to query.

    Value

    -

    (boolean)

    +

    (logical)

    +

    Examples

    +
    # NOT RUN {
    +cn <- CNode('STAGING2')
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +is_token_set(mn)
    +# }
    @@ -113,11 +165,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/mdq_run.html b/docs/reference/mdq_run.html new file mode 100644 index 0000000..8d29372 --- /dev/null +++ b/docs/reference/mdq_run.html @@ -0,0 +1,187 @@ + + + + + + + + +Score a metadata document against a MetaDIG suite — mdq_run • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    This function scores a metadata document against a MetaDIG suite. +The default suite is for the Arctic Data Center.

    + +
    + +
    mdq_run(document, suite_id = "arctic.data.center.suite.1")
    + +

    Arguments

    + + + + + + + + + + +
    document

    (eml/character) Either an EML object or path to a file on disk.

    suite_id

    (character) Specify a suite ID. Should be one of https://quality.nceas.ucsb.edu/quality/suites.

    + +

    Value

    + +

    (data.frame) A sorted data.frame of check results.

    + + +

    Examples

    +
    # NOT RUN {
    +# Check an EML document you are authoring
    +library(EML)
    +mdq_run(new("eml"))
    +
    +# Check an EML document that is saved to disk
    +mdq_run(system.file("examples", "example-eml-2.1.1.xml", package = "EML"))
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/new_uuid.html b/docs/reference/new_uuid.html index f90734b..4c11a79 100644 --- a/docs/reference/new_uuid.html +++ b/docs/reference/new_uuid.html @@ -6,7 +6,7 @@ -Helper function to generate a new UUID PID. — new_uuid • arcticdatautils +Generate a new UUID PID — new_uuid • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,30 +109,38 @@ -
    +
    +
    -

    Helper function to generate a new UUID PID.

    +

    Generate a new UUID PID.

    +
    -
    new_uuid()
    +
    new_uuid()

    Value

    (character) A new UUID PID.

    +

    Examples

    +
    id <- new_uuid()
    @@ -102,11 +151,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/object_exists.html b/docs/reference/object_exists.html index 59b5b2d..7dc52db 100644 --- a/docs/reference/object_exists.html +++ b/docs/reference/object_exists.html @@ -6,7 +6,7 @@ -Check if an object exists on a Member Node. — object_exists • arcticdatautils +Check if an object exists on a Member Node — object_exists • arcticdatautils @@ -18,14 +18,28 @@ + + + + + - + + + + + + + + + - + @@ -68,29 +110,33 @@ -
    +
    +

    This is a simple check for the HTTP status of a /meta/PID call on the -provided member node.

    +provided Member Mode.

    +
    -
    object_exists(node, pids)
    +
    object_exists(node, pids)
    -

    Arguments

    +

    Arguments

    - + - - + +
    node

    (MNode|CNode) The Node to query.

    (MNode) The Member Node to query.

    pid

    (character) PID to check the existence of.

    pids

    (character) The PID(s) to check the existence of.

    @@ -99,6 +145,16 @@

    Value

    (logical) Whether the object exists.

    +

    Examples

    +
    # NOT RUN {
    +# Set environment
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1",
    +"urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe")
    +
    +object_exists(mn, pids)
    +# }
    @@ -117,11 +175,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/parse_resource_map.html b/docs/reference/parse_resource_map.html index 7c80498..1b210d3 100644 --- a/docs/reference/parse_resource_map.html +++ b/docs/reference/parse_resource_map.html @@ -6,7 +6,7 @@ -Parse a Resource Map into a data.frame — parse_resource_map • arcticdatautils +Parse a resource map into a data.frame — parse_resource_map • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,32 +109,48 @@ -
    +
    +
    -

    Parse a Resource Map into a data.frame

    +

    Parse a resource map into a data.frame.

    +
    -
    parse_resource_map(path)
    +
    parse_resource_map(path)
    -

    Arguments

    +

    Arguments

    - +
    path

    (character) Path to the resource map (an RDF/XML file)

    (character) Path to the resource map (an RDF/XML file).

    Value

    -

    (data.frame) The statements in the Resource Map

    +

    (data.frame) The statements in the resource map.

    +

    Examples

    +
    # NOT RUN {
    +# Set environment
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +
    +rm_pid <- "resource_map_urn:uuid:6b2e5753-4a94-4e6f-971c-36420a446ecb"
    +
    +# Write resource map to file
    +writeBin(getObject(mn, rm_pid), "~/Documents/resource_map.rdf")
    +df <- parse_resource_map("~/Documents/resource_map.rdf")
    +# }
    @@ -112,11 +171,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/pid_to_eml_entity.html b/docs/reference/pid_to_eml_entity.html new file mode 100644 index 0000000..022e66b --- /dev/null +++ b/docs/reference/pid_to_eml_entity.html @@ -0,0 +1,194 @@ + + + + + + + + +Create EML entity from a DataONE PID — pid_to_eml_entity • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Create EML entity from a DataONE PID

    + +
    + +
    pid_to_eml_entity(mn, pid, entityType = "otherEntity", ...)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + +
    mn

    (MNode) Member Node where the PID is associated with an object.

    pid

    (character) The PID of the object to create the sub-tree for.

    entityType

    (character) What kind of objects to create from the input. One of "dataTable", +"spatialRaster", "spatialVector", "storedProcedure", "view", or "otherEntity".

    ...

    (optional) Additional arguments to be passed to new(entityType, ...).

    + +

    Value

    + +

    (list) The entity object.

    + + +

    Examples

    +
    # NOT RUN {
    +# Generate EML otherEntity
    +pid_to_eml_entity(mn,
    +                  pid,
    +                  entityType = "otherEntity",
    +                  entityName = "Entity Name",
    +                  entityDescription = "Description about entity")
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/pid_to_eml_physical.html b/docs/reference/pid_to_eml_physical.html new file mode 100644 index 0000000..938d4ef --- /dev/null +++ b/docs/reference/pid_to_eml_physical.html @@ -0,0 +1,184 @@ + + + + + + + + +Create EML physical objects for the given set of PIDs — pid_to_eml_physical • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    This is a wrapper around sysmeta_to_eml_physical() which handles the task of +creating the EML physical.

    + +
    + +
    pid_to_eml_physical(mn, pids)
    + +

    Arguments

    + + + + + + + + + + +
    mn

    (MNode) Member Node where the PID is associated with an object.

    pids

    (character) The PID of the object to create the sub-tree for.

    + +

    Value

    + +

    (list) A list of otherEntity object(s).

    + + +

    Examples

    +
    # NOT RUN {
    +# Generate EML physical objects for all the data in a package
    +pkg <- get_package(mn, pid)
    +pid_to_eml_physical(mn, pkg$data)
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/publish_object.html b/docs/reference/publish_object.html index b379574..281acf8 100644 --- a/docs/reference/publish_object.html +++ b/docs/reference/publish_object.html @@ -6,7 +6,7 @@ -editing.R — publish_object • arcticdatautils +Publish an object on a Member Node — publish_object • arcticdatautils @@ -18,14 +18,31 @@ + + + + + - + + + + + + + + + - + @@ -68,21 +113,28 @@ -
    +
    +
    -

    High-level functions for managing content. -Publish an object on a member node

    +

    Use sensible defaults to publish an object on a Member Node. If identifier is provided, +use it, otherwise generate a UUID. If clone_id is provided, then retrieve the +system metadata for that identifier and use it to provide rightsHolder, accessPolicy, +and replicationPolicy metadata. Note that this function only uploads the object to +the Member Node, and does not add it to a data package, which can be done separately.

    +
    -
    publish_object(mn, path, format_id = NULL, pid = NULL, sid = NULL,
    +    
    publish_object(mn, path, format_id = NULL, pid = NULL, sid = NULL,
       clone_pid = NULL, public = TRUE)
    -

    Arguments

    +

    Arguments

    @@ -91,11 +143,13 @@

    Ar

    - + - + @@ -107,27 +161,36 @@

    Ar

    - + + + + +
    path

    the path to the file to be published

    (character) The path to the file to be published.

    format_id

    (character) Optional. The format ID to set for the object. When not set, guess_format_id will be used to guess the format ID. Should be a DataONE format ID.

    (character) Optional. The format ID to set for the object. +When not set, guess_format_id() will be used to guess the format ID. +Should be a DataONE format ID.

    pid
    clone_pid

    (character) PID of objet to clone System Metadata from

    (character) PID of object to clone System Metadata from.

    public

    (logical) Whether object should be given public read access.

    -

    Details

    +

    Value

    -

    Use sensible defaults to publish an object on a member node. If identifier is provided, -use it, otherwise generate a UUID. If clone_id is provided, then retrieve the -system metadata for that identifier and use it to provide rightsHolder, accessPolicy, -and replicationPolicy metadata. Note that this function only uploads the object to -the Member Node, and does not add it to a data package, which can be done separately.

    +

    pid (character) The PID of the published object.

    +

    Examples

    +
    # NOT RUN {
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +my_path <- "/home/Documents/myfile.csv"
    +pid <- publish_object(mn, path = my_path, format_id = "text/csv", public = FALSE)
    +# }
    @@ -138,11 +201,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/publish_update.html b/docs/reference/publish_update.html index c4e08aa..20e07d9 100644 --- a/docs/reference/publish_update.html +++ b/docs/reference/publish_update.html @@ -6,7 +6,7 @@ -Publish an updated data package. — publish_update • arcticdatautils +Publish an updated data package — publish_update • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,23 +109,27 @@ -
    +
    +
    -

    This function can be used for a variety of tasks:

    +

    Publish an update to a data package after updating data files or metadata.

    +
    -
    publish_update(mn, metadata_pid, resource_map_pid, data_pids = NULL,
    +    
    publish_update(mn, metadata_pid, resource_map_pid, data_pids = NULL,
       child_pids = NULL, metadata_path = NULL, identifier = NULL,
    -  use_doi = FALSE, parent_resmap_pid = NULL, parent_metadata_pid = NULL,
    -  parent_data_pids = NULL, parent_child_pids = NULL, public = TRUE,
    -  check_first = TRUE)
    + use_doi = FALSE, parent_resmap_pid = NULL, + parent_metadata_pid = NULL, parent_data_pids = NULL, + parent_child_pids = NULL, public = TRUE, check_first = TRUE)
    -

    Arguments

    +

    Arguments

    @@ -109,7 +154,8 @@

    Ar

    - + @@ -121,39 +167,45 @@

    Ar

    - + - + - + - + - - - - - - +
    metadata_path

    (character) Optional. Path to a metadata file to update with. If this is not set, the existing metadata document will be used.

    (character or eml) Optional. An eml class object or a path to a metadata file to update with. +If this is not set, the existing metadata document will be used.

    identifier
    parent_resmap_pid

    (character) Optional. PID of a parent package to be updated.

    (character) Optional. PID of a parent package to be updated. +Not optional if a parent package exists.

    parent_metadata_pid

    (character) Optional. Identifier for the metadata document of the parent package.

    (character) Optional. Identifier for the metadata document of the parent package. +Not optional if a parent package exists.

    parent_data_pids

    (character) Optional. Identifier for the data objects of the parent package.

    (character) Optional. Identifier for the data objects of the parent package. +Not optional if the parent package contains data objects.

    parent_child_pids

    (character) Optional. Resource map identifier(s) of child packages in the parent package.

    (character) Optional. Resource map identifier(s) of child packages in the parent package. +resource_map_pid should not be included. Not optional if the parent package contains other child packages.

    public

    (logical) Optional. Make the update public. If FALSE, will set the metadata and resource map to private (but not the data objects). +

    (logical) Optional. Make the update public. If FALSE, will set the metadata and resource map to private (but not the data objects). This applies to the new metadata PID and its resource map and data object. access policies are not affected.

    check_first

    (logical) Optional. Whether to check the PIDs passed in as aruments exist on the MN before continuing. Checks that objects exist and are of the right format type. This speeds up the function, especially when `data_pids` has many elements.

    parent_data_pids

    (logical) Optional. Whether to check the PIDs passed in as arguments exist on the MN before continuing. +Checks that objects exist and are of the right format type. This speeds up the function, especially when data_pids has many elements.

    +

    Value

    + +

    (character) Named character vector of PIDs in the data package, including PIDs for the metadata, resource map, and data objects.

    +

    Details

    -
      +

      This function can be used for a variety of tasks:

      +
      • Publish an existing package with a DOI

      • Update a package with new data objects

      • Update a package with new metadata

      • @@ -161,23 +213,41 @@

        Details

        The metadata_pid and resource_map_pid provide the identifier of an EML metadata document and associated resource map, and the data_pids vector provides a list of PIDs of data objects in the package. Update the metadata file and resource map -by generating a new identifier (a DOI if use_doi is TRUE) and updating the Member +by generating a new identifier (a DOI if use_doi = TRUE) and updating the Member Node with a public version of the object. If metadata_file is not missing, it should be an edited version of the metadata to be used to update the original. If parent_resmap_pid is not missing, it indicates the PID of a parent package that -should be updated as well, using the parent_medata_pid, parent_data_pids, and +should be updated as well, using the parent_metadata_pid, parent_data_pids, and parent_child_pids as members of the updated package. In all cases, the objects are made publicly readable.

        +

        Examples

        +
        # NOT RUN {
        +cn <- CNode("STAGING2")
        +mn <- getMNode(cn,"urn:node:mnTestKNB")
        +
        +rm_pid <- "resource_map_urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe"
        +meta_pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe"
        +data_pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1",
        +"urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe")
        +
        +meta_path <- "/home/Documents/myMetadata.xml"
        +
        +publish_update(mn, meta_pid, rm_pid, data_pids, meta_path, public = TRUE)
        +# }
    @@ -188,11 +258,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/remove_public_read.html b/docs/reference/remove_public_read.html index 8024207..03f549c 100644 --- a/docs/reference/remove_public_read.html +++ b/docs/reference/remove_public_read.html @@ -6,7 +6,7 @@ -Remove public access on a set of objects. — remove_public_read • arcticdatautils +Remove public read access for an object — remove_public_read • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,38 +109,52 @@ -
    +
    +
    -

    Remove public access on a set of objects.

    +

    Remove public read access for an object.

    +
    -
    remove_public_read(mn, pids)
    +
    remove_public_read(mn, pids)
    -

    Arguments

    +

    Arguments

    - + - +
    mn

    (MNode)

    (MNode) The Member Node.

    pids

    (character) A vector of PIDs to set public access on

    (character) The PIDs of the objects to remove public read access for.

    +

    Examples

    +
    # NOT RUN {
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1",
    +"urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe")
    +remove_public_read(mn, pids)
    +# }
    @@ -110,11 +165,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/set_abstract.html b/docs/reference/set_abstract.html index 188cbf6..2ad556c 100644 --- a/docs/reference/set_abstract.html +++ b/docs/reference/set_abstract.html @@ -6,7 +6,7 @@ -Set the abstract on an EML document — set_abstract • arcticdatautils +Set the abstract for an EML document — set_abstract • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,29 +109,33 @@ -
    +
    +
    -

    Set the abstract on an EML document

    +

    Set the abstract for an EML document.

    +
    -
    set_abstract(doc, text)
    +
    set_abstract(doc, text)
    -

    Arguments

    +

    Arguments

    - + @@ -98,11 +143,22 @@

    Ar

    Value

    -

    (eml) The modified EML document

    +

    (eml) The modified EML document.

    Examples

    -
    set_abstract(doc, c("Test abstract..."))
    #> Error in is(doc, "eml"): object 'doc' not found
    set_abstract(doc, c("First para", "second para"))
    #> Error in is(doc, "eml"): object 'doc' not found
    +
    # Create a new EML document +library(EML) +doc <- new("eml") + +# Set an abstract with a single paragraph +set_abstract(doc, c("Test abstract..."))
    #> <eml packageId="d187f1c6-9241-420d-904d-16608c0f5402" system="uuid" xsi:schemaLocation="eml://ecoinformatics.org/eml-2.1.1 eml.xsd"> +#> <dataset> +#> <abstract>hi</abstract> +#> </dataset> +#> </eml>
    +# Or one with multiple paragraphs +set_abstract(doc, c("First para...", "second para..."))
    #> <eml packageId="eb9651d3-575e-4b61-a577-0b8c63a53f4f" system="uuid" xsi:schemaLocation="eml://ecoinformatics.org/eml-2.1.1 eml.xsd"/>
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/set_access.html b/docs/reference/set_access.html index 780ea11..0d36aac 100644 --- a/docs/reference/set_access.html +++ b/docs/reference/set_access.html @@ -6,7 +6,7 @@ -Set the access policy for a set of objects. — set_access • arcticdatautils +Set the access policy for an object — set_access • arcticdatautils @@ -18,14 +18,29 @@ + + + + + - + + + + + + + + + - + @@ -68,21 +111,26 @@ -
    +
    +
    -

    For each permission, this function checks if the permission is already set -and moves on. System Metadata are only updated when a change was needed.

    +

    Set the access policy for the given subjects for the given objects on the given Member Node. +For each type of permission, this function checks if the permission is already set +and only updates the System Metadata when a change is needed.

    +
    -
    set_access(mn, pids, subjects, permissions = c("read", "write",
    +    
    set_access(mn, pids, subjects, permissions = c("read", "write",
       "changePermission"))
    -

    Arguments

    +

    Arguments

    doc

    (eml) An EML document

    (eml) An EML document.

    text

    (character) The abstract text. If text is length one, an -abstract without <para> or section elements will be created. +abstract without <para> or <section> elements will be created. If text is greater than one in length, para elementes will be used for each element.

    @@ -91,23 +139,33 @@

    Ar

    - + - + - +
    pids

    (character) The object(s) to set the permissions on.

    (character) The PIDs of the objects to set permissions for.

    subjects

    (character) The subject(s) to set permissions for.

    (character) The identifiers of the subjects to set permissions for, typically an ORCID or DN.

    permissions

    (character) Optional. Vector of permissions.

    (character) Optional. The permissions to set. Defaults to +read, write, and changePermission.

    Value

    -

    (logical) Named

    +

    (logical) Whether an update was needed.

    +

    Examples

    +
    # NOT RUN {
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1",
    +   "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe")
    +set_access(mn, pids, subjects = "http://orcid.org/0000-000X-XXXX-XXXX",
    +   permissions = c("read", "write", "changePermission"))
    +# }
    @@ -126,11 +186,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/set_file_name.html b/docs/reference/set_file_name.html index e20ff77..fbf82a8 100644 --- a/docs/reference/set_file_name.html +++ b/docs/reference/set_file_name.html @@ -6,7 +6,7 @@ -Set the file name on an object — set_file_name • arcticdatautils +Set the file name for an object — set_file_name • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,19 +109,23 @@ -
    +
    +
    -

    Set the file name on an object

    +

    Set the file name for an object.

    +
    -
    set_file_name(mn, pid, name)
    +
    set_file_name(mn, pid, name)
    -

    Arguments

    +

    Arguments

    @@ -99,9 +144,17 @@

    Ar

    Value

    -

    (logical) Whether the update succeeded, FALSE means there was an error.

    +

    (logical) Whether the update succeeded.

    +

    Examples

    +
    # NOT RUN {
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn, "urn:node:mnTestKNB")
    +
    +pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe"
    +set_file_name(mn, pid, "myfile.csv")
    +# }
    @@ -120,11 +175,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/set_public_read.html b/docs/reference/set_public_read.html index dec18e8..e66ae45 100644 --- a/docs/reference/set_public_read.html +++ b/docs/reference/set_public_read.html @@ -6,7 +6,7 @@ -Set public access on a set of objects. — set_public_read • arcticdatautils +Set public read access for an object — set_public_read • arcticdatautils @@ -18,14 +18,27 @@ + + + + + - + + + + + + + + + - + @@ -68,38 +109,58 @@ -
    +
    +
    -

    Set public access on a set of objects.

    +

    Set public read access for an object.

    +
    -
    set_public_read(mn, pids)
    +
    set_public_read(mn, pids)
    -

    Arguments

    +

    Arguments

    - + - +
    mn

    (MNode)

    (MNode) The Member Node.

    pids

    (character) A vector of PIDs to set public access on

    (character) The PIDs of the objects to set public read access for.

    +

    Value

    +

    (logical) Whether an update was needed.

    + + +

    Examples

    +
    # NOT RUN {
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1",
    +   "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe")
    +set_public_read(mn, pids)
    +# }
    @@ -110,11 +171,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/set_rights_and_access.html b/docs/reference/set_rights_and_access.html index 6657f5b..d026c73 100644 --- a/docs/reference/set_rights_and_access.html +++ b/docs/reference/set_rights_and_access.html @@ -6,8 +6,7 @@ -Set the given subject as the rightsHolder and subject with write and -changePermission access for the given PID. — set_rights_and_access • arcticdatautils +Set rights holder with access policy for an object — set_rights_and_access • arcticdatautils @@ -19,14 +18,29 @@ + + + + + - + + + + + + + + + - + @@ -69,48 +111,61 @@ -
    +
    +
    -

    This function only updates the existing System Metadata if a change is -needed.

    +

    Set the given subject as the rights holder and with given permissions +for the given objects. This function only updates the existing +System Metadata when a change is needed.

    +
    -
    set_rights_and_access(mn, pids, subject, permissions = c("read", "write",
    +    
    set_rights_and_access(mn, pids, subject, permissions = c("read", "write",
       "changePermission"))
    -

    Arguments

    +

    Arguments

    - + - + - + -
    mn

    (MNode) The Member Node to send the query.

    (MNode) The Member Node.

    pids

    (character) The PID(s) to set the access rule for.

    (character) The PIDs of the objects to set the rights holder and access policy for.

    subject

    (character)The subject of the rule(s).

    (character) The identifier of the new rights holder, typically an ORCID or DN.

    permissions

    (character) The permissions for the rule. Defaults to +

    (character) Optional. The permissions to set. Defaults to read, write, and changePermission.

    Value

    -

    Whether an update was needed.

    +

    (logical) Whether an update was needed.

    +

    Examples

    +
    # NOT RUN {
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1",
    +    "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe")
    +set_rights_and_access(mn, pids, "http://orcid.org/0000-000X-XXXX-XXXX",
    +    permissions = c("read", "write", "changePermission"))
    +# }
    @@ -129,11 +186,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/set_rights_holder.html b/docs/reference/set_rights_holder.html index 8ccc98e..8963e51 100644 --- a/docs/reference/set_rights_holder.html +++ b/docs/reference/set_rights_holder.html @@ -6,7 +6,7 @@ -access.R — set_rights_holder • arcticdatautils +Set the rights holder for an object — set_rights_holder • arcticdatautils @@ -18,14 +18,29 @@ + + + + + - + + + + + + + + + - + @@ -68,51 +111,64 @@ -
    +
    +
    -

    High-level utility functions for getting and setting access rules for DataONE -objects. -Set the rightsHolder field for a given PID.

    +

    Set the rights holder to the given subject for the given objects on the +given Member Node. This function checks if the rights holder is already set +and only updates the System Metadata when a change is needed.

    +
    -
    set_rights_holder(mn, pids, subject)
    +
    set_rights_holder(mn, pids, subject)
    -

    Arguments

    +

    Arguments

    - + - + - +
    mn

    (MNode) The MNode instance to be changed.

    (MNode) The Member Node.

    pids

    (character) The identifiers for the object to be changed.

    (character) The PIDs of the objects to set the rights holder for.

    subject

    (character) The identifier of the new rightsHolder, often an ORCID or DN.

    (character) The identifier of the new rights holder, typically an ORCID or DN.

    -

    Details

    +

    Value

    -

    Update the rights holder to the provided subject for the object identified in -the provided system metadata document on the given Member Node.

    +

    (logical) Whether an update was needed.

    +

    Examples

    +
    # NOT RUN {
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1",
    +"urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe")
    +set_rights_holder(mn, pids, subjects = "http://orcid.org/0000-000X-XXXX-XXXX")
    +# }
    @@ -123,11 +179,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/show_indexing_status.html b/docs/reference/show_indexing_status.html new file mode 100644 index 0000000..3babec5 --- /dev/null +++ b/docs/reference/show_indexing_status.html @@ -0,0 +1,184 @@ + + + + + + + + +Show the indexing status of a set of PIDs — show_indexing_status • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Show the indexing status of a set of PIDs.

    + +
    + +
    show_indexing_status(mn, pids)
    + +

    Arguments

    + + + + + + + + + + +
    mn

    (MNode) The Member Node to query.

    pids

    (character/list) One or more PIDs.

    + +

    Value

    + +

    NULL

    + + +

    Examples

    +
    # NOT RUN {
    +# Create a package then check its indexing status
    +library(dataone)
    +mn <- MNode(...)
    +pkg <- create_dummy_package(mn)
    +show_indexing_status(mn, pkg)
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/sysmeta_to_eml_physical.html b/docs/reference/sysmeta_to_eml_physical.html index aff001b..2f7b3f6 100644 --- a/docs/reference/sysmeta_to_eml_physical.html +++ b/docs/reference/sysmeta_to_eml_physical.html @@ -6,7 +6,7 @@ -Create an EML physical subtree from a System Metadata instance — sysmeta_to_eml_physical • arcticdatautils +Create an EML physical object from system metadata — sysmeta_to_eml_physical • arcticdatautils @@ -18,14 +18,29 @@ + + + + + - + + + + + + + + + - + @@ -68,36 +111,57 @@ -
    +
    +
    -

    This function creates a pre-canned EML physical subtree from what's in the -System Metadata of an Object. Note that it sets an Online Distrubtion URL +

    This function creates a pre-canned EML physical object from what's in the +System Metadata of an object. Note that it sets an Online Distribution URL of the DataONE v2 resolve service for the PID.

    +
    -
    sysmeta_to_eml_physical(sysmeta)
    +
    sysmeta_to_eml_physical(sysmeta)
    -

    Arguments

    +

    Arguments

    - +
    sysmeta

    (SystemMetadata) The System Metadata of the object.

    (SystemMetadata) One or more System Metadata objects.

    +

    Value

    +

    (list) A list of physical objects for each sysmeta.

    + + +

    Examples

    +
    # NOT RUN {
    +# Generate EML physical objects for all the data in a package
    +pkg <- get_package(mn, pid)
    +sm <- lapply(pkg$data, function(pid) {
    +  getSystemMetadata(mn, pid)
    +})
    +sysmeta_to_eml_physical(sm)
    +# }
    @@ -108,11 +172,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/update_object.html b/docs/reference/update_object.html index 34b3048..20f511f 100644 --- a/docs/reference/update_object.html +++ b/docs/reference/update_object.html @@ -6,7 +6,7 @@ -Update an object with a new file. — update_object • arcticdatautils +Update an object with a new file — update_object • arcticdatautils @@ -18,14 +18,29 @@ + + + + + - + + + + + + + + + - + @@ -68,21 +111,26 @@ -
    +
    +
    -

    This is a convenience wrapper around `dataone::updateObject` which copies in +

    This is a convenience wrapper around dataone::updateObject() which copies in fields from the old object's System Metadata such as the rightsHolder and accessPolicy and updates only what needs to be changed.

    +
    -
    update_object(mn, pid, path, format_id = NULL, new_pid = NULL, sid = NULL)
    +
    update_object(mn, pid, path, format_id = NULL, new_pid = NULL,
    +  sid = NULL)
    -

    Arguments

    +

    Arguments

    @@ -99,7 +147,18 @@

    Ar

    - + + + + + + + + +
    format_id

    (character) Optional. The format ID to set for the object. When not set, guess_format_id will be used to guess the format ID. Should be a DataONE format ID.

    (character) Optional. The format ID to set for the object. +When not set, guess_format_id() will be used to guess the format ID. +Should be a DataONE format ID.

    new_pid

    (character) Optional. Specify the PID for the new object. +Defaults to automatically generating a new, random UUID-style PID.

    sid

    (character) Optional. Specify a Series ID (SID) to use for the new object.

    @@ -108,6 +167,14 @@

    Value

    (character) The PID of the updated object.

    +

    Examples

    +
    # NOT RUN {
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe"
    +my_path <- "/home/Documents/myfile.csv"
    +new_pid <- update_object(mn, pid, my_path, format_id = "text/csv")
    +# }
    @@ -126,11 +195,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/update_package_object.html b/docs/reference/update_package_object.html new file mode 100644 index 0000000..29f3eeb --- /dev/null +++ b/docs/reference/update_package_object.html @@ -0,0 +1,232 @@ + + + + + + + + +Update a data object and associated resource map and metadata — update_package_object • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    This function updates a data object and then automatically +updates the package resource map with the new data PID. If an object +already has a dataTable, otherEntity, or spatialVector +with a working physical section, the EML will be updated with the new physical. +It is a convenience wrapper around update_object() and publish_update().

    + +
    + +
    update_package_object(mn, data_pid, new_data_path, resource_map_pid,
    +  format_id = NULL, public = TRUE, use_doi = FALSE, ...)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    mn

    (MNode) The Member Node of the data package.

    data_pid

    (character) PID for data object to update.

    new_data_path

    (character) Path to new data object.

    resource_map_pid

    (character) PID for resource map to update.

    format_id

    (character) Optional. The format ID to set for the object. +When not set, guess_format_id() will be used +to guess the format ID. Should be a DataONE format ID.

    public

    (logical) Optional. Make the update public. If FALSE, +will set the metadata and resource map to private (but not the data objects). +This applies to the new metadata PID and its resource map and data object. +Access policies are not affected.

    use_doi

    (logical) Optional. If TRUE, a new DOI will be minted.

    ...

    Other arguments to pass into publish_update().

    + +

    Value

    + +

    (character) Named character vector of PIDs in the data package, including PIDs +for the metadata, resource map, and data objects.

    + +

    See also

    + + + + +

    Examples

    +
    # NOT RUN {
    +cnTest <- dataone::CNode("STAGING")
    +mnTest <- dataone::getMNode(cnTest,"urn:node:mnTestARCTIC")
    +
    +pkg <- create_dummy_package_full(mnTest, title = "My package")
    +
    +file.create("new_file.csv")
    +update_package_object(mnTest, pkg$data[1], "new_file.csv", pkg$resource_map, format_id = "text/csv")
    +file.remove("new_file.csv")
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/docs/reference/update_resource_map.html b/docs/reference/update_resource_map.html index 366fd1f..691e962 100644 --- a/docs/reference/update_resource_map.html +++ b/docs/reference/update_resource_map.html @@ -6,7 +6,7 @@ -Update an existing resource map Object on a Member Node. — update_resource_map • arcticdatautils +Update an existing resource map object on a Member Node — update_resource_map • arcticdatautils @@ -18,14 +18,29 @@ + + + + + - + + + + + + + + + - + @@ -68,69 +111,112 @@ -
    +
    +

    This function first generates a new resource map RDF/XML document locally and -then uses the dataone::updateObject function to update an Object on the +then uses the dataone::updateObject() function to update an object on the specified MN.

    +
    -
    update_resource_map(mn, resource_map_pid, metadata_pid, data_pids = NULL,
    +    
    update_resource_map(mn, resource_map_pid, metadata_pid, data_pids = NULL,
       child_pids = NULL, other_statements = NULL, identifier = NULL,
       public = FALSE, check_first = TRUE)
    -

    Arguments

    +

    Arguments

    + + + + + + + + + + + + + + + + + + + + - + - + - + +arguments exist on the MN before continuing. This speeds up the function, +especially when data_pids has many elements.

    mn

    (MNode) The Member Node.

    resource_map_pid

    (character) The PID of the resource map to be updated.

    metadata_pid

    (character) The PID of the metadata object to go in the package.

    data_pids

    (character) The PID(s) of the data objects to go in the package.

    child_pids

    (character) The resource map PIDs of the packages to be +nested under the package.

    other_statements

    (data.frame) Extra statements to add to the Resource Map.

    (data.frame) Extra statements to add to the resource map.

    identifier

    (character) Manually specify the identifier for the new metadata object.

    public

    Whether or not to make the new resource map public read -(logical)

    (logical) Whether or not to make the new resource map public read.

    check_first

    (logical) Optional. Whether to check the PIDs passed in as -aruments exist on the MN before continuing. This speeds up the function, -especially when `data_pids` has many elements.

    +

    Value

    + +

    (character) The PID of the updated resource map.

    +

    Details

    -

    If you only want to generate resource map RDF/XML, see -generate_resource_map.

    -

    This function also can be used to be used to add a new child packages to a -parent package. For exmaple, if you have:

    +

    If you only want to generate resource map RDF/XML, see generate_resource_map().

    +

    This function also can be used to add a new child packages to a +parent package. For example, if you have:

    Parent A B

    -

    and want to add C as a sibling package to A and B, e.g.

    +

    and want to add C as a sibling package to A and B, e.g.:

    Parent A B C

    -

    you could use this function.

    -

    Note: This function currently replaces the rightsHolder on the Resource Map +

    then you could use this function.

    +

    Note: This function currently replaces the rightsHolder on the resource map temporarily to allow updating but sets it back to the rightsHolder that was in place before the update.

    +

    Examples

    +
    # NOT RUN {
    +cn <- CNode('STAGING2')
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +
    +rm_pid <- "resource_map_urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe"
    +meta_pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe"
    +data_pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1",
    +"urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe")
    +
    +rm_new <- update_resource_map(mn, rm_pid, meta_pid, data_pids)
    +# }
    @@ -141,11 +227,14 @@

    Contents

    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/view_profile.html b/docs/reference/view_profile.html index 9245607..d542095 100644 --- a/docs/reference/view_profile.html +++ b/docs/reference/view_profile.html @@ -18,14 +18,29 @@ + + + + + - + + + + + + + + + - + @@ -68,23 +111,25 @@ -
    +
    +
    -

    This function is intended to be (poorly) simulate what a user sees when they +

    This function is intended to (poorly) simulate what a user sees when they browse to their "My Data Sets" page (their #profile URL). It uses a similar -Solr to what Metacat UI uses to generate the list. The results of this -function may be the same as what's on the #profile page but may be missing -some of the user's datasets when:

    +Solr query to what Metacat UI uses to generate the list.

    +
    -
    view_profile(mn, subject, fields = c("identifier", "title"))
    +
    view_profile(mn, subject, fields = c("identifier", "title"))
    -

    Arguments

    +

    Arguments

    @@ -94,7 +139,7 @@

    Ar

    +likely going to be an ORCID, e.g. http://orcid.org....

    @@ -104,28 +149,39 @@

    Ar

    Value

    -

    (data.frame) data.frame with the results.

    +

    (data.frame) A data.frame with the results.

    Details

    -

    - The user has any datasets in their #profile that the person running the +

    The results of this function may be the same as what's on the #profile page +but may be missing some of the user's datasets when:

      +
    • The user has any datasets in their #profile that the person running the query (you) can't read. This is rare on arcticdata.io but possible because arctic-data-admins usually has read/write/changePermission -permissions on every object. -- The user has datasets owned by an Equivalent Identity of the subject -being queried. This is rare, especially on arcticdata.io.

      +permissions on every object.

    • +
    • The user has datasets owned by an Equivalent Identity of the subject +being queried. This is rare, especially on arcticdata.io.

    • +

    Examples

    -
    ## Not run: ------------------------------------ -# options(...set...your...token....) -# mn <- env_load('production')$mn -# me <- get_token_subject() -# profile(mn, me) -# -# // Get a custom set of fields -# view_profile(mn, me, "origin") -## ---------------------------------------------
    +
    # NOT RUN {
    +options(...set...your...token....)
    +mn <- env_load('production')$mn
    +me <- get_token_subject()
    +profile(mn, me)
    +
    +// Get a custom set of fields
    +view_profile(mn, me, "origin")
    +
    +# Set environment
    +cn <- CNode("STAGING2")
    +mn <- getMNode(cn,"urn:node:mnTestKNB")
    +
    +package_df <- view_profile(mn, "http://orcid.org/0000-0003-4703-1974", fields = c("title"))
    +
    +# }
    +
    -

    Site built with pkgdown.

    +

    Site built with pkgdown.

    + + + diff --git a/docs/reference/which_in_eml.html b/docs/reference/which_in_eml.html new file mode 100644 index 0000000..a6b1ab4 --- /dev/null +++ b/docs/reference/which_in_eml.html @@ -0,0 +1,199 @@ + + + + + + + + +Search through EMLs — which_in_eml • arcticdatautils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    This function returns indices within an EML list that contain an instance where +test == TRUE. See examples for more information.

    + +
    + +
    which_in_eml(eml_list, element, test)
    + +

    Arguments

    +
    subject

    (character) The subject to find the datasets for. This is -likely going to be your ORCID, e.g. http://orcid.org....

    fields
    + + + + + + + + + + + + + +
    eml_list

    (S4/List) An EML list object.

    element

    (character) Element to evaluate.

    test

    (function/character) A function to evaluate (see examples). If test is a character, +will evaluate if element == test (see example 1).

    + + +

    Examples

    +
    # NOT RUN {
    +# Question: Which creators have a surName "Smith"?
    +n <- which_in_eml(eml@dataset@creator, "surName", "Smith")
    +# Answer: eml@dataset@creator[n]
    +
    +# Question: Which dataTables have an entityName that begins with "2016"
    +n <- which_in_eml(eml@dataset@dataTable, "entityName", function(x) {grepl("^2016", x)})
    +# Answer: eml@dataset@dataTable[n]
    +
    +# Question: Which attributes in dataTable[[1]] have a numberType "natural"?
    +n <- which_in_eml(eml@dataset@dataTable[[1]]@attributeList@attribute, "numberType", "natural")
    +# Answer: eml@dataset@dataTable[[1]]@attributeList@attribute[n]
    +
    +#' # Question: Which dataTables have at least one attribute with a numberType "natural"?
    +n <- which_in_eml(eml@dataset@dataTable, "numberType", function(x) {"natural" %in% x})
    +# Answer: eml@dataset@dataTable[n]
    +# }
    +
    + +
    + +
    + + +
    +

    Site built with pkgdown.

    +
    + +
    +
    + + + + + + diff --git a/index.md b/index.md new file mode 100644 index 0000000..f6f90e1 --- /dev/null +++ b/index.md @@ -0,0 +1,24 @@ +# arcticdatautils + +[![Travis build status](https://travis-ci.org/NCEAS/arcticdatautils.svg?branch=master)](https://travis-ci.org/NCEAS/arcticdatautils) + +The `arcticdatautils` package contains code for doing lots of useful stuff that's too specific for the [dataone](https://github.com/DataONEorg/rdataone) package: + +- Inserting large numbers of files into a Metacat Member Node +- High-level [dataone](https://github.com/DataONEorg/rdataone) wrappers for working with Objects and Data Packages that streamline Arctic Data Center operations + +Note: The package is intended to be used by NCEAS staff and may not make much sense to others. + +## Installing + +We recommend installing from the latest [release](https://github.com/NCEAS/arcticdatautils/releases) (aka tag) instead of from `master`. Install the latest release with the [`remotes`](https://github.com/r-lib/remotes) package: + +```r +remotes::install_github("nceas/arcticdatautils@*release") +``` + +If you're feeling adventurous, you can install from the bleeding edge: + +```r +remotes::install_github("nceas/arcticdatautils") +``` diff --git a/inst/example-eml-220.xml b/inst/example-eml-220.xml new file mode 100644 index 0000000..5964b54 --- /dev/null +++ b/inst/example-eml-220.xml @@ -0,0 +1,86 @@ + + + some-alternate-identifier-string + arcticdata R package test + + + Test + User + + + + testuser + principtalInvestigator + + 2013-12-16 + eng + + Just an abstract. + + + SOME_KEY_WORD + + + This work is licensed under the Creative Commons Attribution 4.0 International License.To view a copy of this license, visit http://creativecommons.org/licenses/by/4.0/. + + + + No geographic description provided. + + -135 + -134 + 59 + 57 + + + + + + 2011-06-15 + + + + 2013-05-31 + + + + + + + testuser + + + A Test Project Title + + + Bryce + Mecum + + principalInvestigator + + An abstract. + + NSF Award XXXXXX + + + + NA + + NA + 27 + 5179254207aed0f39ded1add3d9bab3ea0e10b084c2c194ccf0a033b8f5e7789 + + + application/octet-stream + + + + + ecogrid://knb/urn:uuid:89bec5d0-26db-48ac-ae54-e1b4c999c456 + + + + Other + + + diff --git a/inst/example-eml-full.xml b/inst/example-eml-full.xml new file mode 100644 index 0000000..b1e1859 --- /dev/null +++ b/inst/example-eml-full.xml @@ -0,0 +1,182 @@ + + + + A Dummy Package + + + Henrietta + High-Stakes + + Deadwood Saloon + Owner +
    + Wild West + Deadwood + CA +
    + (123) 456 - 7890 + dummy@dummy.com + http://orcid.org/XXXX-XXXX-XXXX-XXXX +
    + + + Henrietta + High-Stakes + + Deadwood Saloon + Owner +
    + Wild West + Deadwood + CA +
    + (123) 456 - 7890 + dummy@dummy.com + http://orcid.org/XXXX-XXXX-XXXX-XXXX +
    + + + Harry + High-Stakes + + Deadwood Saloon + Co-Owner + + + + Henrietta + High-Stakes + + Deadwood Saloon + Owner +
    + Wild West + Deadwood + CA +
    + (123) 456 - 7890 + dummy@dummy.com + http://orcid.org/XXXX-XXXX-XXXX-XXXX + principalInvestigator +
    + 2018 + + This is an abstract. + + + This work is dedicated to the public domain under the Creative Commons Universal 1.0 Public Domain Dedication. To view a copy of this dedication, visit https://creativecommons.org/publicdomain/zero/1.0/. + + + + Somewhere in the world + + 70 + 70 + 65 + 65 + + + + Another place in the world + + 80 + 80 + 75 + 75 + + + + + 2018 + + + + Everything was identified to species + + Species + Homo sapiens + + + Species + Canis lupus + + + + + + Henrietta + High-Stakes + + Deadwood Saloon + Owner +
    + Wild West + Deadwood + CA +
    + (123) 456 - 7890 + dummy@dummy.com + http://orcid.org/XXXX-XXXX-XXXX-XXXX +
    + + + Banker + Bob + + Deadwood Bank + Owner + + + + Jesse + Wales + + Deadwood Streets + Outlaw + + + + Elizabeth + Money + + Deadwood Land Investing + Land Investor + + + + + Some methods + + + + + Some more methods + + + + + + Lots of sampling + + + + More sampling + + + + + A Dummy Package + + + Harry + High-Stakes + + principalInvestigator + + + 1234 + 4567 + + +
    +
    diff --git a/man/add_access_rules.Rd b/man/add_access_rules.Rd deleted file mode 100644 index 1823d62..0000000 --- a/man/add_access_rules.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sysmeta.R -\name{add_access_rules} -\alias{add_access_rules} -\title{sysmeta.R} -\usage{ -add_access_rules(sysmeta) -} -\arguments{ -\item{sysmeta}{(SystemMetadata) The SystemMetadata to add rules to.} -} -\value{ -The modified SystemMetadata object -} -\description{ -Utility functions for modifying System Metadata objects. -Add access rules to the sysmeta object -} -\details{ -This is a function because I add a set of standard set of access rules to -every object and the access rules don't differ across objects. -} diff --git a/man/add_additional_identifiers.Rd b/man/add_additional_identifiers.Rd deleted file mode 100644 index 56d2b65..0000000 --- a/man/add_additional_identifiers.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{add_additional_identifiers} -\alias{add_additional_identifiers} -\title{Add a set of additional identifiers to an EML document.} -\usage{ -add_additional_identifiers(path, identifiers) -} -\arguments{ -\item{path}{(character) Path to the EML document.} - -\item{identifiers}{(character) Set of identifiers to add.} -} -\value{ -(character) Path to the modified document. -} -\description{ -Add a set of additional identifiers to an EML document. -} diff --git a/man/add_admin_group_access.Rd b/man/add_admin_group_access.Rd deleted file mode 100644 index 767af4c..0000000 --- a/man/add_admin_group_access.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sysmeta.R -\name{add_admin_group_access} -\alias{add_admin_group_access} -\title{Adds access to the given System Metadata for the arctic-data-admins group} -\usage{ -add_admin_group_access(sysmeta) -} -\arguments{ -\item{sysmeta}{} -} -\description{ -Adds access to the given System Metadata for the arctic-data-admins group -} diff --git a/man/add_dummy_prov.Rd b/man/add_dummy_prov.Rd new file mode 100644 index 0000000..d1d39c8 --- /dev/null +++ b/man/add_dummy_prov.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.R +\name{add_dummy_prov} +\alias{add_dummy_prov} +\title{Add prov to a dummy package} +\usage{ +add_dummy_prov(mn, rm_pid) +} +\arguments{ +\item{mn}{member node (the ADC test node)} + +\item{rm_pid}{resource map identifier} +} +\description{ +Adds provenance information to a dummy package for testing +} diff --git a/man/add_methods_step.Rd b/man/add_methods_step.Rd deleted file mode 100644 index 8b3c022..0000000 --- a/man/add_methods_step.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/eml.R -\name{add_methods_step} -\alias{add_methods_step} -\title{Adds a step to the methods document} -\usage{ -add_methods_step(doc, title, description) -} -\arguments{ -\item{doc}{(eml) The EML document to add the method step to.} - -\item{title}{(character) The title of the method step.} - -\item{description}{(character) The description of the method.} -} -\value{ -(eml) The modified EML document -} -\description{ -Adds a step to the methods document -} diff --git a/man/add_string_to_title.Rd b/man/add_string_to_title.Rd deleted file mode 100644 index 5279e23..0000000 --- a/man/add_string_to_title.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{add_string_to_title} -\alias{add_string_to_title} -\title{Adds a string to the title element in the given file.} -\usage{ -add_string_to_title(path, string) -} -\arguments{ -\item{path}{(character) Path to the XML file to edit.} - -\item{string}{(character) The new value.} -} -\description{ -Adds a string to the title element in the given file. -} diff --git a/man/arcticdatautils.Rd b/man/arcticdatautils.Rd new file mode 100644 index 0000000..6a356b8 --- /dev/null +++ b/man/arcticdatautils.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/arcticdatautils.R +\docType{package} +\name{arcticdatautils} +\alias{arcticdatautils} +\alias{arcticdatautils-package} +\title{arcticdatautils: Utilities for the Arctic Data Center} +\description{ +This package contains code for doing lots of useful stuff that's too specific for the +dataone package, primarily functions that streamline Arctic Data Center operations. +} diff --git a/man/change_eml_name.Rd b/man/change_eml_name.Rd deleted file mode 100644 index 4458d8e..0000000 --- a/man/change_eml_name.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{change_eml_name} -\alias{change_eml_name} -\title{Utility function to extract a name string from an XML individualName node, -parse it into tokens,and reformat the individualName with new children nodes} -\usage{ -change_eml_name(party) -} -\arguments{ -\item{party}{the XML node containing a subclass of eml:ResponsibleParty} -} -\value{ -the modified XML node -} -\description{ -Utility function to extract a name string from an XML individualName node, -parse it into tokens,and reformat the individualName with new children nodes -} diff --git a/man/clear_methods.Rd b/man/clear_methods.Rd deleted file mode 100644 index c1a261c..0000000 --- a/man/clear_methods.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/eml.R -\name{clear_methods} -\alias{clear_methods} -\title{Clear all methods from the document.} -\usage{ -clear_methods(doc) -} -\arguments{ -\item{doc}{(eml) The document to clear methods from.} -} -\value{ -(eml) The modified document. -} -\description{ -Clear all methods from the document. -} diff --git a/man/clear_replication_policy.Rd b/man/clear_replication_policy.Rd deleted file mode 100644 index 9771474..0000000 --- a/man/clear_replication_policy.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sysmeta.R -\name{clear_replication_policy} -\alias{clear_replication_policy} -\title{Clear the replication policy from a System Metadata object} -\usage{ -clear_replication_policy(sysmeta) -} -\arguments{ -\item{sysmeta}{(SystemMetadata) The System Metadata object to clear the replication policy of.} -} -\value{ -(SystemMetadata) The modified System Metadata object. -} -\description{ -Clear the replication policy from a System Metadata object -} diff --git a/man/convert_iso_to_eml.Rd b/man/convert_iso_to_eml.Rd index 4939de6..a85eb5e 100644 --- a/man/convert_iso_to_eml.Rd +++ b/man/convert_iso_to_eml.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/util.R \name{convert_iso_to_eml} \alias{convert_iso_to_eml} -\title{Convert and ISO document to EML using an XSLT.} +\title{Convert an ISO document to EML using an XSLT} \usage{ convert_iso_to_eml(path, style = NA) } @@ -17,3 +17,9 @@ convert_iso_to_eml(path, style = NA) \description{ Leave style=NA if you want to use the default ISO-to-EML stylesheet. } +\examples{ +\dontrun{ +iso_path <- "~/Docuements/ISO_metadata.xml" +eml_path <- convert_iso_to_eml(iso_path) +} +} diff --git a/man/create_dummy_attributes_dataframe.Rd b/man/create_dummy_attributes_dataframe.Rd new file mode 100644 index 0000000..466d477 --- /dev/null +++ b/man/create_dummy_attributes_dataframe.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.R +\name{create_dummy_attributes_dataframe} +\alias{create_dummy_attributes_dataframe} +\title{Create test attributes data.frame} +\usage{ +create_dummy_attributes_dataframe(numberAttributes, factors = NULL) +} +\arguments{ +\item{numberAttributes}{(integer) Number of attributes to be created in the table.} + +\item{factors}{(character) Optional vector of factor names to include.} +} +\value{ +(data.frame) A data.frame of attributes. +} +\description{ +Create a test data.frame of attributes. +} +\examples{ +\dontrun{ +# Create dummy attribute dataframe with 6 attributes and 1 factor +attributes <- create_dummy_attributes_dataframe(6, c("Factor1", "Factor2")) +} +} diff --git a/man/create_dummy_enumeratedDomain_dataframe.Rd b/man/create_dummy_enumeratedDomain_dataframe.Rd new file mode 100644 index 0000000..c58ccb3 --- /dev/null +++ b/man/create_dummy_enumeratedDomain_dataframe.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.R +\name{create_dummy_enumeratedDomain_dataframe} +\alias{create_dummy_enumeratedDomain_dataframe} +\title{Create test enumeratedDomain data.frame} +\usage{ +create_dummy_enumeratedDomain_dataframe(factors) +} +\arguments{ +\item{factors}{(character) Vector of factor names to include.} +} +\value{ +(data.frame) A data.frame of factors. +} +\description{ +Create a test data.frame of enumeratedDomains. +} +\examples{ +\dontrun{ +# Create dummy dataframe of 2 factors/enumerated domains +attributes <- create_dummy_enumeratedDomain_dataframe(c("Factor1", "Factor2")) +} +} diff --git a/man/create_dummy_metadata.Rd b/man/create_dummy_metadata.Rd index bd55313..4cebc97 100644 --- a/man/create_dummy_metadata.Rd +++ b/man/create_dummy_metadata.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/helpers.R \name{create_dummy_metadata} \alias{create_dummy_metadata} -\title{helpers.R} +\title{Create a test metadata object} \usage{ create_dummy_metadata(mn, data_pids = NULL) } @@ -11,7 +11,17 @@ create_dummy_metadata(mn, data_pids = NULL) \item{data_pids}{(character) Optional. PIDs for data objects the metadata documents.} } +\value{ +(character) The PID of the published metadata document. +} \description{ -Various helper functions for things like testing the package. -Create a test metadata object. +Create a test EML metadata object. +} +\examples{ +\dontrun{ +# Set environment +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") +pid <- create_dummy_metadata(mn) +} } diff --git a/man/create_dummy_object.Rd b/man/create_dummy_object.Rd index 4e4ff7d..8083b7e 100644 --- a/man/create_dummy_object.Rd +++ b/man/create_dummy_object.Rd @@ -2,13 +2,25 @@ % Please edit documentation in R/helpers.R \name{create_dummy_object} \alias{create_dummy_object} -\title{Create a test object.} +\title{Create a test object} \usage{ create_dummy_object(mn) } \arguments{ \item{mn}{(MNode) The Member Node.} } +\value{ +(character) The PID of the dummy object. +} \description{ -Create a test object. +Create a test data object. +} +\examples{ +\dontrun{ +# Set environment +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") + +pid <- create_dummy_object(mn) +} } diff --git a/man/create_dummy_package.Rd b/man/create_dummy_package.Rd index 34cbf74..032eacb 100644 --- a/man/create_dummy_package.Rd +++ b/man/create_dummy_package.Rd @@ -2,15 +2,27 @@ % Please edit documentation in R/helpers.R \name{create_dummy_package} \alias{create_dummy_package} -\title{Create a test package.} +\title{Create a test package} \usage{ create_dummy_package(mn, size = 2) } \arguments{ \item{mn}{(MNode) The Member Node.} -\item{size}{(numeric) The number of files in the package.} +\item{size}{(numeric) The number of files in the package, including the metadata file.} +} +\value{ +(list) The PIDs for all elements in the data package. } \description{ -Create a test package. +Create a test data package. +} +\examples{ +\dontrun{ +# Set environment +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") +#Create dummy package with 5 data objects and 1 metadata object +pids <- create_dummy_package(mn, 6) +} } diff --git a/man/create_dummy_package_full.Rd b/man/create_dummy_package_full.Rd new file mode 100644 index 0000000..c8bf203 --- /dev/null +++ b/man/create_dummy_package_full.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.R +\name{create_dummy_package_full} +\alias{create_dummy_package_full} +\title{Create dummy package with fuller metadata} +\usage{ +create_dummy_package_full(mn, title = "A Dummy Package") +} +\arguments{ +\item{mn}{(MNode) The Member Node.} + +\item{title}{(character) Optional. Title of package. Defaults to "A Dummy Package".} +} +\value{ +(list) The PIDs for all elements in the data package. +} +\description{ +Creates a fuller package than \code{\link[=create_dummy_package]{create_dummy_package()}} +but is otherwise based on the same concept. This dummy +package includes multiple data objects, responsible parties, +geographic locations, method steps, etc. +} diff --git a/man/create_dummy_parent_package.Rd b/man/create_dummy_parent_package.Rd index e6202ef..7dd7ef2 100644 --- a/man/create_dummy_parent_package.Rd +++ b/man/create_dummy_parent_package.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/helpers.R \name{create_dummy_parent_package} \alias{create_dummy_parent_package} -\title{Create a test parent package.} +\title{Create a test parent package} \usage{ create_dummy_parent_package(mn, children) } @@ -11,6 +11,14 @@ create_dummy_parent_package(mn, children) \item{children}{(character) Child package (resource maps) PIDs.} } +\value{ +(list) The resource map PIDs for both the parent and child packages. +} \description{ -Create a test parent package. +Create a test parent data package. +} +\examples{ +\dontrun{ +# Set environment +} } diff --git a/man/create_from_folder.Rd b/man/create_from_folder.Rd deleted file mode 100644 index 4b7e933..0000000 --- a/man/create_from_folder.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inserting.R -\name{create_from_folder} -\alias{create_from_folder} -\title{inserting.R} -\usage{ -create_from_folder(mn, path, data_pids = NULL) -} -\arguments{ -\item{mn}{(MNode) The Member Node to create the packages on.} - -\item{path}{(character) The path to the folder containing the files.} - -\item{data_pids}{(character) Optional. Manually specify the PIDs of data. This is useful if data were inserted outside this function and you want to re-use those objects.} -} -\value{ -(list) All of the PIDs created. -} -\description{ -A set of utilities for inserting packages from files and folders on disk. -Create a package from a folder containing an ISO package (legacy) -} -\details{ -This function handles the process of inserting the original ISO package -and updating it with an EML package. - -Note: This only works for Gateway packages right now. -} diff --git a/man/create_object.Rd b/man/create_object.Rd deleted file mode 100644 index f36747a..0000000 --- a/man/create_object.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/packaging.R -\name{create_object} -\alias{create_object} -\title{Create an object from a row of the inventory.} -\usage{ -create_object(file, sysmeta, base_path, mn) -} -\arguments{ -\item{file}{(data.frame)A row from the inventory.} - -\item{sysmeta}{(SystemMetadata) The file's sysmeta.} - -\item{base_path}{(character)} - -\item{mn}{(MNode)} -} -\description{ -Create an object from a row of the inventory. -} diff --git a/man/create_resource_map.Rd b/man/create_resource_map.Rd index 2240a32..ae8a8ac 100644 --- a/man/create_resource_map.Rd +++ b/man/create_resource_map.Rd @@ -2,36 +2,47 @@ % Please edit documentation in R/editing.R \name{create_resource_map} \alias{create_resource_map} -\title{Create a resource map Object on a Member Node.} +\title{Create a resource map object on a Member Node} \usage{ -create_resource_map(mn, metadata_pid, data_pids = NULL, child_pids = NULL, - check_first = TRUE) +create_resource_map(mn, metadata_pid, data_pids = NULL, + child_pids = NULL, check_first = TRUE, ...) } \arguments{ \item{mn}{(MNode) The Member Node} -\item{metadata_pid}{(character) The PID of the metadata object to go in the -package.} +\item{metadata_pid}{(character) The PID of the metadata object to go in the package.} -\item{data_pids}{(character) The PID(s) of the data objects to go in the -package.} +\item{data_pids}{(character) The PID(s) of the data objects to go in the package.} \item{child_pids}{(character) The resource map PIDs of the packages to be nested under the package.} \item{check_first}{(logical) Optional. Whether to check the PIDs passed in as -aruments exist on the MN before continuing. This speeds up the function, -especially when `data_pids` has many elements.} +arguments exist on the MN before continuing. This speeds up the function, +especially when \code{data_pids} has many elements.} + +\item{...}{Additional arguments that can be passed into \code{\link[=publish_object]{publish_object()}}.} } \value{ -(character) The created resource map's PID +(character) The PID of the created resource map. } \description{ This function first generates a new resource map RDF/XML document locally and -then uses the dataone::createObject function to create the Object on the +then uses the \code{\link[dataone:createObject]{dataone::createObject()}} function to create the object on the specified MN. } \details{ -If you only want to generate resource map RDF/XML, see -\code{\link{generate_resource_map}} +If you only want to generate resource map RDF/XML, see \code{\link[=generate_resource_map]{generate_resource_map()}}. +} +\examples{ +\dontrun{ +cn <- CNode('STAGING2') +mn <- getMNode(cn,"urn:node:mnTestKNB") + +meta_pid <- 'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe' +dat_pid <- c('urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1', +'urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe') + +create_resource_map(mn, metadata_pid = meta_pid, data_pids = dat_pid) +} } diff --git a/man/create_sysmeta.Rd b/man/create_sysmeta.Rd deleted file mode 100644 index 5e00721..0000000 --- a/man/create_sysmeta.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/packaging.R -\name{create_sysmeta} -\alias{create_sysmeta} -\title{Create a sysmeta object.} -\usage{ -create_sysmeta(file, base_path, submitter, rights_holder) -} -\arguments{ -\item{file}{(data.frame) A single row from the inventory.} - -\item{base_path}{(character) The path prefix to use with the contents of `file[1,"filename]` that -will be used to locate the file on disk.} - -\item{submitter}{(character) The submitter DN string for the object.} - -\item{rights_holder}{(character) The rights holder DN string for the object.} -} -\value{ -The sysmeta object (dataone::SystemMetadata) -} -\description{ -This is a wrapper function around the constructor for a -dataone::SystemMetadata object. -} diff --git a/man/determine_child_pids.Rd b/man/determine_child_pids.Rd deleted file mode 100644 index 75f1978..0000000 --- a/man/determine_child_pids.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/packaging.R -\name{determine_child_pids} -\alias{determine_child_pids} -\title{Calculate a set of child PIDs for a given package.} -\usage{ -determine_child_pids(inventory, package) -} -\arguments{ -\item{package}{} -} -\description{ -Calculate a set of child PIDs for a given package. -} diff --git a/man/eml_abstract.Rd b/man/eml_abstract.Rd new file mode 100644 index 0000000..9d0b868 --- /dev/null +++ b/man/eml_abstract.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml.R +\name{eml_abstract} +\alias{eml_abstract} +\title{Create an EML abstract} +\usage{ +eml_abstract(text) +} +\arguments{ +\item{text}{(character) Paragraphs of text with one paragraph per element in the +character vector, constructed using \code{list}} +} +\value{ +(abstract) An EML abstract. +} +\description{ +Create an EML abstract. +} +\details{ +Note that eml$abstract() provides the same functionality. +} +\examples{ +\dontrun{ +# Set an abstract with a single paragraph +eml_abstract("Test abstract...") + +# Or one with multiple paragraphs +eml_abstract(list("First para...", "second para...")) +} +} diff --git a/man/eml_add_entities.Rd b/man/eml_add_entities.Rd deleted file mode 100644 index 06091e5..0000000 --- a/man/eml_add_entities.Rd +++ /dev/null @@ -1,50 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/eml.R -\name{eml_add_entities} -\alias{eml_add_entities} -\title{Add new entity (otherEntity, dataTable, etc) elements to an EML document from a table.} -\usage{ -eml_add_entities(doc, entities, - resolve_base = "https://cn.dataone.org/cn/v2/resolve/") -} -\arguments{ -\item{doc}{(eml) An EML document} - -\item{entities}{(data.frame) A data.frame with columns type, path, pid, and -format_id} - -\item{resolve_base}{(character) Optional. Specify a DataONE CN resolve base -URI which will be used for serializing download URLs into the EML. Most users - should not override the default value.} -} -\value{ -(eml) The modified EML document. -} -\description{ -Add new entity (otherEntity, dataTable, etc) elements to an EML document from a table. -} -\examples{ -# Create entities from files on disk -\dontrun{ - types <- c("dataTable") - paths <- list.files(., full.names = TRUE) # Get full paths to some files - pids <- vapply(paths, function(x) { paste0("urn:uuid:", uuid::UUIDgenerate()) }, "") # Generate some UUID PIDs - format_ids <- guess_format_id(paths) # Try to guess format IDs, you should check this afterwards - - entity_df <- data.frame(type = types, - path = paths, - pid = pids, - format_id = format_ids, - stringsAsFactors = FALSE) - - doc <- new("eml") - doc <- eml_add_entities(doc, entity_df) -} - -# Read in a CSV containing the info about files on disk -\dontrun{ - entity_df <- read.csv("./my_entities.csv", stringsAsFactors = FALSE) - doc <- new("eml") - doc <- eml_add_entities(doc, entity_df) -} -} diff --git a/man/eml_address.Rd b/man/eml_address.Rd index 7221f2c..5ef855a 100644 --- a/man/eml_address.Rd +++ b/man/eml_address.Rd @@ -2,22 +2,25 @@ % Please edit documentation in R/eml.R \name{eml_address} \alias{eml_address} -\title{Create an EML address element.} +\title{Create an EML address element} \usage{ eml_address(delivery_points, city, administrative_area, postal_code) } \arguments{ \item{delivery_points}{(character) One or more delivery points.} -\item{city}{(character) City} +\item{city}{(character) City.} -\item{administrative_area}{(character) Administrative area} +\item{administrative_area}{(character) Administrative area.} -\item{postal_code}{(character) Postal code} +\item{postal_code}{(character) Postal code.} } \value{ (address) An EML address object. } \description{ -Create an EML address element. +A simple way to create an EML address element. +} +\details{ +Note that EML::eml$address() provides the same functionality } diff --git a/man/eml_associated_party.Rd b/man/eml_associated_party.Rd index 8466cd4..99b15b4 100644 --- a/man/eml_associated_party.Rd +++ b/man/eml_associated_party.Rd @@ -7,13 +7,13 @@ eml_associated_party(...) } \arguments{ -\item{...}{Arguments passed on to eml_party} +\item{...}{Arguments passed on to \code{\link[=eml_party]{eml_party()}}.} } \value{ -(associatedParty) The new associatedParty +(associatedParty) The new associatedParty. } \description{ -See \code{\link{eml_party}} for details. +See \code{\link[=eml_party]{eml_party()}} for details. } \examples{ eml_associated_party("test", "user", email = "test@user.com", role = "Principal Investigator") diff --git a/man/eml_contact.Rd b/man/eml_contact.Rd index 7123874..7796621 100644 --- a/man/eml_contact.Rd +++ b/man/eml_contact.Rd @@ -7,14 +7,19 @@ eml_contact(...) } \arguments{ -\item{...}{Arguments passed on to eml_party} +\item{...}{Arguments passed on to \code{\link[=eml_party]{eml_party()}}.} } \value{ -(contact) The new contact +(contact) The new contact. } \description{ -See \code{\link{eml_party}} for details. +See \code{\link[=eml_party]{eml_party()}} for details. } \examples{ +\dontrun{ eml_contact("test", "user", email = "test@user.com") +eml_creator("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766") +eml_creator("creator", c("Dominic", "'Dom'"), "Mullen", c("NCEAS", "UCSB"), + c("Data Scientist", "Programmer")) +} } diff --git a/man/eml_creator.Rd b/man/eml_creator.Rd index bcde548..491d7df 100644 --- a/man/eml_creator.Rd +++ b/man/eml_creator.Rd @@ -7,14 +7,19 @@ eml_creator(...) } \arguments{ -\item{...}{Arguments passed on to eml_party} +\item{...}{Arguments passed on to \code{\link[=eml_party]{eml_party()}}.} } \value{ -(creator) The new creator +(creator) The new creator. } \description{ -See \code{\link{eml_party}} for details. +See \code{\link[=eml_party]{eml_party()}} for details. } \examples{ +\dontrun{ eml_creator("test", "user", email = "test@user.com") +eml_creator("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766") +eml_creator("creator", c("Dominic", "'Dom'"), "Mullen", c("NCEAS", "UCSB"), + c("Data Scientist", "Programmer")) +} } diff --git a/man/eml_geographic_coverage.Rd b/man/eml_geographic_coverage.Rd new file mode 100644 index 0000000..efe825f --- /dev/null +++ b/man/eml_geographic_coverage.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml.R +\name{eml_geographic_coverage} +\alias{eml_geographic_coverage} +\title{Create an EML geographicCoverage section} +\usage{ +eml_geographic_coverage(description, north, east, south, west) +} +\arguments{ +\item{description}{(character) A textual description.} + +\item{north}{(numeric) North bounding coordinate.} + +\item{east}{(numeric) East bounding coordinate.} + +\item{south}{(numeric) South bounding coordinate.} + +\item{west}{(numeric) West bounding coordinate.} +} +\value{ +(geographicCoverage) The new geographicCoverage section. +} +\description{ +A simple way to create an EML geographicCoverage section. +} +\details{ +For a bounding box, all coordinates should be unique. +For a single point, the North and South bounding coordinates should be the same and +the East and West bounding coordinates should be the same. + +Note that EML::set_coverage() provides the same (and more) functionality +} diff --git a/man/eml_get_simple.Rd b/man/eml_get_simple.Rd new file mode 100644 index 0000000..967effd --- /dev/null +++ b/man/eml_get_simple.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml.R +\name{eml_get_simple} +\alias{eml_get_simple} +\title{Get a simple list output from EML::eml_get()} +\usage{ +eml_get_simple(doc, element) +} +\arguments{ +\item{doc}{(list) An EML object or child/descendant object} + +\item{element}{(character) Name of the element to be extracted. If +multiple occurrences are found, will extract all.} +} +\value{ +out (vector) A list of values contained in element given +} +\description{ +This function is a convenience wrapper around EML::eml_get() which +returns the output as a simple list as opposed to an object of type +\code{emld} by removing the attributes and context from the object. If an +element containing children is returned all of it's children will be +flattened into a named character vector. This function is best used +to extract values from elements that have no children. +} +\examples{ +\dontrun{ +cn <- dataone::CNode('PROD') +adc <- dataone::getMNode(cn,'urn:node:ARCTIC') + +doc <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) + +datatable_names <- eml_get_simple(doc$dataset$dataTable, element = "entityName") +} + +} diff --git a/man/eml_individual_name.Rd b/man/eml_individual_name.Rd deleted file mode 100644 index 3d5ec2b..0000000 --- a/man/eml_individual_name.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/eml.R -\name{eml_individual_name} -\alias{eml_individual_name} -\title{Create an EML individualName section} -\usage{ -eml_individual_name(given_names = NULL, sur_name) -} -\arguments{ -\item{given_names}{(character) One or more given names.} - -\item{sur_name}{(character) A sur (last) name.} -} -\value{ -(individualName) The new individualName section -} -\description{ -Create an EML individualName section -} diff --git a/man/eml_metadata_provider.Rd b/man/eml_metadata_provider.Rd index e85a878..19ca392 100644 --- a/man/eml_metadata_provider.Rd +++ b/man/eml_metadata_provider.Rd @@ -7,13 +7,13 @@ eml_metadata_provider(...) } \arguments{ -\item{...}{Arguments passed on to eml_party} +\item{...}{Arguments passed on to \code{\link[=eml_party]{eml_party()}}.} } \value{ -(metadataProvider) The new metadataProvider +(metadataProvider) The new metadataProvider. } \description{ -See \code{\link{eml_party}} for details. +See \code{\link[=eml_party]{eml_party()}} for details. } \examples{ eml_metadata_provider("test", "user", email = "test@user.com") diff --git a/man/eml_nsf_to_project.Rd b/man/eml_nsf_to_project.Rd new file mode 100644 index 0000000..eb40f3a --- /dev/null +++ b/man/eml_nsf_to_project.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml.R +\name{eml_nsf_to_project} +\alias{eml_nsf_to_project} +\title{Create an EML project section from a list of NSF award numbers} +\usage{ +eml_nsf_to_project(awards, eml_version = "2.1") +} +\arguments{ +\item{awards}{(list) A list of NSF award numbers as characters} + +\item{eml_version}{(char) EML version to use (2.1.1 or 2.2.0)} +} +\value{ +project (emld) An EML project section +} +\description{ +This function takes a list of NSF award numbers and uses it to +query the NSF API to get the award title, PIs, and coPIs. The +return value is an EML project section. The function supports 1 +or more award numbers +} +\examples{ +awards <- c("1203146", "1203473", "1603116") + +proj <- eml_nsf_to_project(awards, eml_version = "2.1.1") + +me <- list(individualName = list(givenName = "Jeanette", surName = "Clark")) + +doc <- list(packageId = "id", system = "system", + dataset = list(title = "A Mimimal Valid EML Dataset", + creator = me, + contact = me)) + +doc$dataset$project <- proj + +EML::eml_validate(doc) + +} diff --git a/man/eml_otherEntity_to_dataTable.Rd b/man/eml_otherEntity_to_dataTable.Rd new file mode 100644 index 0000000..2b363eb --- /dev/null +++ b/man/eml_otherEntity_to_dataTable.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml.R +\name{eml_otherEntity_to_dataTable} +\alias{eml_otherEntity_to_dataTable} +\title{Convert otherEntities to dataTables} +\usage{ +eml_otherEntity_to_dataTable(doc, index, validate_eml = TRUE) +} +\arguments{ +\item{doc}{(list) An EML document.} + +\item{index}{(integer) The indicies of the otherEntities to be transformed.} + +\item{validate_eml}{(logical) Optional. Whether or not to validate the EML after +completion. Setting this to \code{FALSE} reduces execution time by ~50 percent.} +} +\description{ +Convert an EML 'otherEntity' object to a 'dataTable' object. This will convert an +otherEntity object as currently constructed - it does not add a physical or add attributes. +However, if these are already in their respective slots, they will be retained. +} +\examples{ +\dontrun{ +doc <- read_eml(system.file("example-eml.xml", package = "arcticdatautils")) + +doc <- eml_otherEntity_to_dataTable(doc, 1) +} +} +\author{ +Dominic Mullen dmullen17@gmail.com +} diff --git a/man/eml_party.Rd b/man/eml_party.Rd index 2b925e2..43eac76 100644 --- a/man/eml_party.Rd +++ b/man/eml_party.Rd @@ -2,38 +2,54 @@ % Please edit documentation in R/eml.R \name{eml_party} \alias{eml_party} -\title{Low-level helper for creating EML parties} +\title{Create an EML party} \usage{ -eml_party(type = "associatedParty", given_names = NULL, sur_name = NULL, - organization = NULL, position = NULL, email = NULL, phone = NULL, - address = NULL, userId = NULL, role = NULL) +eml_party(type = "associatedParty", given_names = NULL, + sur_name = NULL, organization = NULL, position = NULL, + email = NULL, phone = NULL, address = NULL, userId = NULL, + role = NULL) } \arguments{ -\item{type}{(character) The type of party (e.g. 'contact')} +\item{type}{(character) The type of party (e.g. 'contact').} -\item{given_names}{(character) The party's given name(s)} +\item{given_names}{(character) The party's given name(s).} -\item{sur_name}{(character) The party's surname} +\item{sur_name}{(character) The party's surname.} -\item{organization}{(character) The party's organization name} +\item{organization}{(character) The party's organization name.} -\item{position}{(character) The party's position} +\item{position}{(character) The party's position.} -\item{email}{(character) The party's email address(es)} +\item{email}{(character) The party's email address(es).} -\item{phone}{(character) The party's phone number(s)} +\item{phone}{(character) The party's phone number(s).} -\item{address}{(character) The party's address(es)} +\item{address}{(character) The party's address(es) as a valid EML address} -\item{userId}{(character) The party's ORCID, in format https://orcid.org/WWWW-XXXX-YYYY-ZZZZ} +\item{userId}{(character) The party's ORCID, in format https://orcid.org/WWWW-XXXX-YYYY-ZZZZ.} -\item{role}{(character) The party's role} +\item{role}{(character) The party's role.} } \value{ -An instance of the party specified by the in \code{type} argument +(party) An instance of the party specified by the \code{type} argument. } \description{ -You usually will want to use the high-level functions such as -\code{\link{eml_creator}} and \code{\link{eml_contact}} but using this is -fine. +You will usually want to use the high-level functions such as +\code{\link[=eml_creator]{eml_creator()}} and \code{\link[=eml_contact]{eml_contact()}} but using this is fine. +} +\details{ +The \code{userId} argument assumes an ORCID so be sure to adjust for that. +} +\examples{ +\dontrun{ +eml_party("creator", "Test", "User") +eml_party("creator", "Bryce", "Mecum", userId = "https://orcid.org/0000-0002-0381-3766") +eml_party("creator", given_names = list("Dominic", "'Dom'"), + sur_name = "Mullen", list("NCEAS", "UCSB"), + position = list("Data Scientist", "Programmer"), + address = eml$address(deliveryPoint = "735 State St", + city = "Santa Barbara", + administrativeArea = "CA", + postalCode = "85719")) +} } diff --git a/man/eml_personnel.Rd b/man/eml_personnel.Rd new file mode 100644 index 0000000..18edae0 --- /dev/null +++ b/man/eml_personnel.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml.R +\name{eml_personnel} +\alias{eml_personnel} +\title{Create an EML personnel} +\usage{ +eml_personnel(role = NULL, ...) +} +\arguments{ +\item{role}{(character) Personnel role, e.g. "principalInvestigator".} + +\item{...}{Arguments passed on to \code{\link[=eml_party]{eml_party()}}.} +} +\value{ +(personnel) The new personnel. +} +\description{ +See \code{\link[=eml_party]{eml_party()}} for details. +} +\examples{ +eml_personnel("test", "user", email = "test@user.com", role = "principalInvestigator") +} diff --git a/man/eml_project.Rd b/man/eml_project.Rd index 3a7c847..9775fda 100644 --- a/man/eml_project.Rd +++ b/man/eml_project.Rd @@ -2,30 +2,44 @@ % Please edit documentation in R/eml.R \name{eml_project} \alias{eml_project} -\title{Create an eml-project section.} +\title{Create an EML project section} \usage{ -eml_project(title, awards, first, last, organizations = NULL, - role = "originator") +eml_project(title, personnelList, abstract = NULL, funding = NULL, + studyAreaDescription = NULL, designDescription = NULL, + relatedProject = NULL) } \arguments{ -\item{title}{(character) Title of the project.} +\item{title}{(character) Title of the project (Required). May have multiple titles +constructed using \code{list}.} -\item{awards}{(character) One or more awards for the project.} +\item{personnelList}{(list of personnel) Personnel involved with the project.} -\item{first}{(character) First name of the person with role `role`.} +\item{abstract}{(character) Project abstract. Can pass as a list +for separate paragraphs.} -\item{last}{(character) Last name of the person with role `role`.} +\item{funding}{(character) Funding sources for the project such as grant and +contract numbers. Can pass as a list for separate paragraphs.} -\item{organizations}{(character) Optional. One or more organization strings.} +\item{studyAreaDescription}{(studyAreaDescription)} -\item{role}{(character) Optional. Specify an alternate role.} +\item{designDescription}{(designDescription)} + +\item{relatedProject}{(project)} } \value{ (project) The new project section. } \description{ -Note: This is super-limited right now. +Create an EML project section. +} +\details{ +Note - studyAreaDescription, designDescription, and relatedProject are not +fully fleshed out. Need to pass these objects in directly if you want to use +them. } \examples{ -eml_project("Some title", "51231", "Some", "User") +proj <- eml_project(list("Some title", "A second title if needed"), + list(eml_personnel("Bryce", "Mecum", role = "principalInvestigator")), + list("Abstract paragraph 1", "Abstract paragraph 2"), + "Funding Agency: Award Number 12345") } diff --git a/man/eml_set_reference.Rd b/man/eml_set_reference.Rd new file mode 100644 index 0000000..b753c96 --- /dev/null +++ b/man/eml_set_reference.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml.R +\name{eml_set_reference} +\alias{eml_set_reference} +\title{Set a reference to an EML object} +\usage{ +eml_set_reference(element_to_reference, element_to_replace) +} +\arguments{ +\item{element_to_reference}{(list) An EML element to reference.} + +\item{element_to_replace}{(list) An EML element to replace with a reference.} +} +\description{ +This function creates a new object with the same class as \code{element_to_replace} +using a reference to \code{element_to_reference}. +} +\examples{ +\dontrun{ +cn <- dataone::CNode('PROD') +adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +doc <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) + +# Set the first contact as a reference to the first creator +doc$dataset$contact[[1]] <- eml_set_reference(doc$dataset$creator[[1]], +doc$dataset$contact[[1]]) + +# This is also useful when we want to set references to a subset of 'dataTable' + or 'otherEntity' objects +# Add a few more objects first to illustrate the use: +doc$dataset$dataTable[[3]] <- doc$dataset$dataTable[[1]] +doc$dataset$dataTable[[4]] <- doc$dataset$dataTable[[1]] +# Add references to the second and third elements only (not the 4th): +for (i in 2:3) { + doc$dataset$dataTable[[i]]$attributeList <- eml_set_reference( + doc$dataset$dataTable[[1]]$attributeList, + doc$dataset$dataTable[[i]]$attributeList) +} +# If we print the entire 'dataTable' list we see elements 2 and 3 have +references while 4 does not. + +doc$dataset$dataTable +} +} +\author{ +Dominic Mullen dmullen17@gmail.com +} diff --git a/man/eml_set_shared_attributes.Rd b/man/eml_set_shared_attributes.Rd new file mode 100644 index 0000000..5c9bf8d --- /dev/null +++ b/man/eml_set_shared_attributes.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml.R +\name{eml_set_shared_attributes} +\alias{eml_set_shared_attributes} +\title{Set shared attribute references} +\usage{ +eml_set_shared_attributes(doc, attributeList = NULL, + type = "dataTable") +} +\arguments{ +\item{doc}{(emld) An EML object.} + +\item{attributeList}{(attributeList) Optional. An EML attributeList object. If not provided +then it will default to the attributeList of the first \code{type} element.} + +\item{type}{(character) Optional. Specifies whether to replace 'dataTable' or 'otherEntity' +attributeList objects with references. Defaults to 'dataTable'.} +} +\value{ +(doc) The modified EML document. +} +\description{ +This function sets shared attributes using the attributes of the first \code{type} +selected and creates references for all remaining objects of equivalent \code{type}. +} +\examples{ +\dontrun{ +cn <- dataone::CNode('PROD') +adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +doc <- EML::read_eml(dataone::getObject(adc, 'doi:10.18739/A2S17SS1M')) +atts <- EML::set_attributes( + EML::get_attributes(eml$dataset$dataTable[[1]]$attributeList)$attributes) + +eml <- eml_set_shared_attributes(eml, atts, type = 'dataTable') +} +} +\author{ +Dominic Mullen dmullen17@gmail.com +} diff --git a/man/eml_validate_attributes.Rd b/man/eml_validate_attributes.Rd index 89c8f9f..8788864 100644 --- a/man/eml_validate_attributes.Rd +++ b/man/eml_validate_attributes.Rd @@ -7,21 +7,17 @@ eml_validate_attributes(attributes) } \arguments{ -\item{attributes}{(attributeList) An attributeList} +\item{attributes}{(attributeList) An attributeList.} } \value{ -(boolean) Named vector of TRUE/FALSE indicating which attributes -are valid +(logical) Named vector indicating which attributes are valid. } \description{ The attributes passed into this function are validated one-by-one and the progress of going through each attribute is printed to the screen along -with any and all validation issues. -} -\details{ -This is done by, for each attribute in the list, creating a minimum valid -EML document and adding a new otherEntity with a new attributeList containing -the single attribute to be validated. +with any and all validation issues. This is done by, for each attribute in the list, +creating a minimum valid EML document and adding a new otherEntity with a new +attributeList containing the single attribute to be validated. } \examples{ \dontrun{ diff --git a/man/env_get.Rd b/man/env_get.Rd index a94169e..6cb434a 100644 --- a/man/env_get.Rd +++ b/man/env_get.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/environment.R \name{env_get} \alias{env_get} -\title{environment.R -Author: Bryce Mecum } +\title{Get the current environment name} \usage{ env_get() } @@ -11,7 +10,5 @@ env_get() (character) The environment name. } \description{ -Functions related to loading configuriation based upon the environment -the code is being run under. Get the current environment name. } diff --git a/man/env_load.Rd b/man/env_load.Rd deleted file mode 100644 index c8e9d4a..0000000 --- a/man/env_load.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/environment.R -\name{env_load} -\alias{env_load} -\title{Load environmental variables from a YAML-formatted environment file.} -\usage{ -env_load(name = NULL, path = NULL, skip_mn = FALSE) -} -\arguments{ -\item{name}{(character) Optional. The environment name.} - -\item{path}{(character) Optional. Path to an environment file.} - -\item{skip_mn}{(logical) Optional. Skip contacting the MNode and filling in the $mn element of the environment.} -} -\value{ -(list) A list of name-value pairs. -} -\description{ -This file should be formatted in the following way: -} -\details{ -some_environment: - var_one: some value - var_two: some value - var_three: some value -} diff --git a/man/extract_local_identifier.Rd b/man/extract_local_identifier.Rd deleted file mode 100644 index e4e3748..0000000 --- a/man/extract_local_identifier.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{extract_local_identifier} -\alias{extract_local_identifier} -\title{util.R -Author: Bryce Mecum } -\usage{ -extract_local_identifier(type, file) -} -\arguments{ -\item{type}{(character) A string, one of "gateway" or "field-projects".} - -\item{file}{(character) A string, a connection, or raw vector (same as xml2::read_xml).} -} -\description{ -General utility functions that may be later merged into other files. -Extracts the local identifier for an ACADIS ISO metadata XML file. -} diff --git a/man/filter_obsolete_pids.Rd b/man/filter_obsolete_pids.Rd deleted file mode 100644 index 8d22bc8..0000000 --- a/man/filter_obsolete_pids.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{filter_obsolete_pids} -\alias{filter_obsolete_pids} -\title{Filters PIDs that are obsolete.} -\usage{ -filter_obsolete_pids(node, pids) -} -\arguments{ -\item{node}{(MNode|CNode) The Node to query.} - -\item{pids}{(character) PIDs to check the obsoletion state of.} -} -\value{ -(character) PIDs that are not obsoleted by another PID. -} -\description{ -Whether or not a PID is obsolete is determined by whether its "obsoletedBy" -property is set to another PID (TRUE) or is NA (FALSE). -} diff --git a/man/filter_packaging_statements.Rd b/man/filter_packaging_statements.Rd deleted file mode 100644 index 86229b9..0000000 --- a/man/filter_packaging_statements.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/packaging.R -\name{filter_packaging_statements} -\alias{filter_packaging_statements} -\title{Filter statements related to packaging} -\usage{ -filter_packaging_statements(statements) -} -\arguments{ -\item{statements}{(data.frame) A set of Statements to be filtered} -} -\value{ -(data.frame) The filtered Statements -} -\description{ -This function was written specifically for the case of updating a resource -map while preserving any extra statements that have been added such as PROV -statements. Statements are filtered according to these rules: -} -\details{ -1. If the subject or object is the ore:ResourceMap resource -2. If the subject or object is the ore:Aggregation resource -3. If the predicate is cito:documents or cito:isDocumentedBy -4. Once filters 1-3 have been executed, any remaining triples are considered - for removal if they look like dangling dc:identifier statements - -The consequence of filter 4 is that dc:identifier statements are left in if -they are still in use by another statement -} diff --git a/man/find_newest_object.Rd b/man/find_newest_object.Rd index 0812cdb..2d02ceb 100644 --- a/man/find_newest_object.Rd +++ b/man/find_newest_object.Rd @@ -2,23 +2,23 @@ % Please edit documentation in R/util.R \name{find_newest_object} \alias{find_newest_object} -\title{Find the newest (by dateUploaded) object within a given set of objects.} +\title{Find the newest object within the given set of objects} \usage{ find_newest_object(node, identifiers, rows = 1000) } \arguments{ -\item{node}{(MNode | CNode) The node to query} +\item{node}{(MNode/CNode) The Member Node to query.} -\item{rows}{(numeric) Optional. Specify the size of the query result set.} +\item{identifiers}{(character) One or more identifiers.} -\item{identiifers}{(character) One or more identifiers} +\item{rows}{(numeric) Optional. Specify the size of the query result set.} } \value{ (character) The PID of the newest object. In the case of a tie (very unlikely) the first element, in natural order, is returned. } \description{ -Find the newest (by dateUploaded) object within a given set of objects. +Find the newest object, based on dateUploaded, within the given set of objects. } \examples{ \dontrun{ diff --git a/man/find_newest_resource_map.Rd b/man/find_newest_resource_map.Rd deleted file mode 100644 index d96609e..0000000 --- a/man/find_newest_resource_map.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{find_newest_resource_map} -\alias{find_newest_resource_map} -\title{Get the resource map(s) for the given object.} -\usage{ -find_newest_resource_map(node, pid, rows = 1000) -} -\arguments{ -\item{node}{(MNode|CNode) The Node to query.} - -\item{pid}{(character) The object to get the resource map(s) for.} - -\item{rows}{(numeric) Optional. The number of query results to return. This -shouldn't need to be modified and the default, 1000, is very likely to be -more than enough.} -} -\value{ -(character) The resource map(s) that contain `pid`. -} -\description{ -Get the resource map(s) for the given object. -} diff --git a/man/fix_bad_enum.Rd b/man/fix_bad_enum.Rd deleted file mode 100644 index bebd9ee..0000000 --- a/man/fix_bad_enum.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modify_metadata.R -\name{fix_bad_enum} -\alias{fix_bad_enum} -\title{Fix a metadata record with a bad topicCategory.} -\usage{ -fix_bad_enum(path) -} -\arguments{ -\item{path}{} -} -\value{ - -} -\description{ -This is the case where the ISO schema says what's inside a -gmd:MD_TopicCategoryCode element should match items from a controlled -vocabulary. But in the ISO metadata we have, there are newlines and spaces -around that text which causes a check for string equality to fail. i.e. -} -\details{ -'oceans' != ' oceans ' -} diff --git a/man/fix_bad_topic.Rd b/man/fix_bad_topic.Rd deleted file mode 100644 index 82fded5..0000000 --- a/man/fix_bad_topic.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modify_metadata.R -\name{fix_bad_topic} -\alias{fix_bad_topic} -\title{Fix a metadata record with multiple MD_TopicCategory children elements -inside a single topicCategory element.} -\usage{ -fix_bad_topic(path) -} -\arguments{ -\item{path}{} -} -\value{ - -} -\description{ -Example: -} -\details{ - - imageryBaseMapsEarthCover - oceans - -} diff --git a/man/format_eml.Rd b/man/format_eml.Rd index 95dff20..968fef5 100644 --- a/man/format_eml.Rd +++ b/man/format_eml.Rd @@ -1,23 +1,25 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dataone_formats.R +% Please edit documentation in R/formats.R \name{format_eml} \alias{format_eml} -\title{Helper function to generate the EML 2.1.1 format ID.} +\title{Generate the EML 2.1.1 format ID} \usage{ -format_eml() +format_eml(version) +} +\arguments{ +\item{version}{The version of EML ('2.1.1' or '2.2.0')} } \value{ (character) The format ID for EML 2.1.1. } \description{ -Helper function to generate the EML 2.1.1 format ID. +Returns the EML 2.1.1 format ID. } \examples{ -format_eml - +format_eml("2.1.1") \dontrun{ # Upload a local EML 2.1.1 file: env <- env_load() -publish_object(env$mn, "path_to_some_EML_file", format_eml()) +publish_object(env$mn, "path_to_some_EML_file", format_eml("2.1")) } } diff --git a/man/format_iso.Rd b/man/format_iso.Rd index ebe61b6..19443d7 100644 --- a/man/format_iso.Rd +++ b/man/format_iso.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dataone_formats.R +% Please edit documentation in R/formats.R \name{format_iso} \alias{format_iso} -\title{dataone_formats.R} +\title{Generate the ISO 19139 format ID} \usage{ format_iso() } @@ -10,12 +10,7 @@ format_iso() (character) The format ID for ISO 19139. } \description{ -A set of thin functions which return the DataONE format ID string. These are -to aid in filling in function arguments and can't remember or don't want to -type in the full format ID. By putting these format ID strings into -functions, a user's autocompletion routine in their editor can help them -fill in the format ID they want. -Helper function to generate the ISO 19139 format ID.w +Returns the ISO 19139 format ID. } \examples{ format_iso() diff --git a/man/generate_resource_map.Rd b/man/generate_resource_map.Rd index fe738d9..266c32d 100644 --- a/man/generate_resource_map.Rd +++ b/man/generate_resource_map.Rd @@ -2,33 +2,32 @@ % Please edit documentation in R/packaging.R \name{generate_resource_map} \alias{generate_resource_map} -\title{Create a resource map RDF/XML file and save is to a temporary path. -This is a convenience wrapper around the constructor of the `ResourceMap` -class from `DataPackage`.} +\title{Create a resource map RDF/XML file and save is to a temporary path} \usage{ -generate_resource_map(metadata_pid, data_pids = NULL, child_pids = NULL, - other_statements = NULL, +generate_resource_map(metadata_pid, data_pids = NULL, + child_pids = NULL, other_statements = NULL, resolve_base = "https://cn.dataone.org/cn/v2/resolve", resource_map_pid = NULL) } \arguments{ -\item{metadata_pid}{(character) PID of the metadata Object.} +\item{metadata_pid}{(character) PID of the metadata object.} -\item{data_pids}{(character) PID(s) of the data Objects.} +\item{data_pids}{(character) PID(s) of the data objects.} -\item{child_pids}{(character) Optional. PID(s) of child Resource Maps.} +\item{child_pids}{(character) Optional. PID(s) of child resource maps.} -\item{other_statements}{(data.frame) Extra statements to add to the Resource Map.} +\item{other_statements}{(data.frame) Extra statements to add to the resource map.} \item{resolve_base}{(character) Optional. The resolve service base URL.} + +\item{resource_map_pid}{(character) The PID of a resource map.} } \value{ -Absolute path to the Resource Map on disk (character) +(character) Absolute path to the resource map on disk. } \description{ -Create a resource map RDF/XML file and save is to a temporary path. -This is a convenience wrapper around the constructor of the `ResourceMap` -class from `DataPackage`. +This is a convenience wrapper around the constructor of the \code{ResourceMap} +class from \code{DataPackage}. } \examples{ \dontrun{ diff --git a/man/generate_resource_map_pid.Rd b/man/generate_resource_map_pid.Rd deleted file mode 100644 index 1ba3679..0000000 --- a/man/generate_resource_map_pid.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/packaging.R -\name{generate_resource_map_pid} -\alias{generate_resource_map_pid} -\title{Generate a PID for a new resource map by appending "resource_map_" to it.} -\usage{ -generate_resource_map_pid(metadata_pid) -} -\arguments{ -\item{metadata_pid}{} -} -\description{ -Generate a PID for a new resource map by appending "resource_map_" to it. -} diff --git a/man/get_all_sysmeta.Rd b/man/get_all_sysmeta.Rd new file mode 100644 index 0000000..4eee884 --- /dev/null +++ b/man/get_all_sysmeta.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sysmeta.R +\name{get_all_sysmeta} +\alias{get_all_sysmeta} +\title{Get system metadata for all elements of a data package} +\usage{ +get_all_sysmeta(mn, resource_map_pid, nmax = 1000, + child_packages = FALSE) +} +\arguments{ +\item{mn}{(MNode) The Member Node to query.} + +\item{resource_map_pid}{(character) The PID for a resource map.} + +\item{nmax}{(numeric) The maximum number of system metadata objects to return.} + +\item{child_packages}{(logical) If parent package, whether or not to include child packages.} +} +\value{ +(list) A list of system metadata objects. +} +\description{ +This function retrieves the system metadata for all elements of a data package and returns them as a list. +It is useful for inspecting system metadata for an entire data package and identifying changes where needed. +} +\examples{ +\dontrun{ +cn_staging <- CNode("STAGING") +adc_test <- getMNode(cn_staging, "urn:node:mnTestARCTIC") + +rm_pid <- "resource_map_urn:uuid:..." + +all <- get_all_sysmeta(adc_test, rm_pid) + +# View in viewer to inspect +View(all) + +# Print specific elements to console +all[[1]]@rightsHolder + +# Create separate object +sysmeta_md <- all[[2]] +} +} diff --git a/man/get_all_versions.Rd b/man/get_all_versions.Rd index e3e5df5..87b4a75 100644 --- a/man/get_all_versions.Rd +++ b/man/get_all_versions.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/util.R \name{get_all_versions} \alias{get_all_versions} -\title{Get the PIDs of all versions of an object.} +\title{Get the PIDs of all versions of an object} \usage{ get_all_versions(node, pid) } \arguments{ -\item{node}{(MNode|CNode) The node to query.} +\item{node}{(MNode) The Member Node to query.} \item{pid}{(character) Any object in the chain.} } @@ -17,3 +17,12 @@ get_all_versions(node, pid) \description{ Get the PIDs of all versions of an object. } +\examples{ +\dontrun{ +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") +pid <- "urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1" + +ids <- get_all_versions(mn, pid) +} +} diff --git a/man/get_current_version.Rd b/man/get_current_version.Rd deleted file mode 100644 index 39e3849..0000000 --- a/man/get_current_version.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{get_current_version} -\alias{get_current_version} -\title{Get the current package version.} -\usage{ -get_current_version() -} -\value{ -(character) The current package version. -} -\description{ -This function parses the installed DESCRIPTION file to get the latest -version. -} diff --git a/man/get_doc_id.Rd b/man/get_doc_id.Rd deleted file mode 100644 index d93bc9f..0000000 --- a/man/get_doc_id.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/eml.R -\name{get_doc_id} -\alias{get_doc_id} -\title{Get the Metacat docid for the given identifier} -\usage{ -get_doc_id(sysmeta) -} -\arguments{ -\item{sysmeta}{(SystemMetadata) The sysmeta of the object you want to find.} -} -\value{ -(character) The docid -} -\description{ -Get the Metacat docid for the given identifier -} diff --git a/man/get_identifier.Rd b/man/get_identifier.Rd deleted file mode 100644 index cadcff4..0000000 --- a/man/get_identifier.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{get_identifier} -\alias{get_identifier} -\title{Get the identifier from a DataONE response.} -\usage{ -get_identifier(dataone_response) -} -\arguments{ -\item{dataone_response}{("XMLInternalDocument" "XMLAbstractDocument")} -} -\value{ -(character) The PID. -} -\description{ -Example resposne: -} -\details{ - - urn:uuid:12aaf494-5840-434d-9cdb-c2597d58543e - -} diff --git a/man/get_latest_release.Rd b/man/get_latest_release.Rd deleted file mode 100644 index ec17dfb..0000000 --- a/man/get_latest_release.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{get_latest_release} -\alias{get_latest_release} -\title{Use the GitHub API to find the latest release for the package.} -\usage{ -get_latest_release() -} -\value{ -(character) The latest release. -} -\description{ -Use the GitHub API to find the latest release for the package. -} diff --git a/man/get_mn_base_url.Rd b/man/get_mn_base_url.Rd index a635f3d..46dd487 100644 --- a/man/get_mn_base_url.Rd +++ b/man/get_mn_base_url.Rd @@ -2,13 +2,22 @@ % Please edit documentation in R/dataone.R \name{get_mn_base_url} \alias{get_mn_base_url} -\title{Get the base URL of the Member Node.} +\title{Get base URL of a Member Node} \usage{ get_mn_base_url(mn) } \arguments{ -\item{mn}{} +\item{mn}{(character) The Member Node.} +} +\value{ +(character) The URL. } \description{ -Get the base URL of the Member Node. +Get the base URL of a Member Node. +} +\examples{ +\dontrun{ +cn <- CNode('STAGING2') +mn <- getMNode(cn,"urn:node:mnTestKNB") +} } diff --git a/man/get_ncdf4_attributes.Rd b/man/get_ncdf4_attributes.Rd index cfbf874..87370e3 100644 --- a/man/get_ncdf4_attributes.Rd +++ b/man/get_ncdf4_attributes.Rd @@ -7,13 +7,13 @@ get_ncdf4_attributes(nc) } \arguments{ -\item{nc}{(ncdf4 or character) Either a ncdf4 object or a file path} +\item{nc}{(ncdf4/character) Either a ncdf4 object or a file path.} } \value{ -(data.frame) A data.frame of the attributes +(data.frame) A data.frame of the attributes. } \description{ -Get a data.frame of attributes from a NetCDF object +Get a data.frame of attributes from a NetCDF object. } \examples{ \dontrun{ diff --git a/man/get_netcdf_format_id.Rd b/man/get_netcdf_format_id.Rd deleted file mode 100644 index 9a51c68..0000000 --- a/man/get_netcdf_format_id.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{get_netcdf_format_id} -\alias{get_netcdf_format_id} -\title{Determine the DataONE format ID for the NetCDF file provided by path.} -\usage{ -get_netcdf_format_id(path) -} -\arguments{ -\item{path}{(character) Full or relative path to the file in question.} -} -\value{ -(character) The DataONE format ID. -} -\description{ -Determine the DataONE format ID for the NetCDF file provided by path. -} diff --git a/man/get_or_create_pid.Rd b/man/get_or_create_pid.Rd deleted file mode 100644 index 9836714..0000000 --- a/man/get_or_create_pid.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/packaging.R -\name{get_or_create_pid} -\alias{get_or_create_pid} -\title{Get the already-minted PID from the inventory or mint a new one.} -\usage{ -get_or_create_pid(file, mn, scheme = "UUID") -} -\arguments{ -\item{file}{(data.frame) A single row from the inventory.} - -\item{mn}{(MNode) The Member Node that will mint the new PID, if needed.} - -\item{scheme}{(character) The identifier scheme to use.} -} -\value{ -The identifier (character) -} -\description{ -Get the already-minted PID from the inventory or mint a new one. -} diff --git a/man/get_orcid_email.Rd b/man/get_orcid_email.Rd new file mode 100644 index 0000000..8cf7ae9 --- /dev/null +++ b/man/get_orcid_email.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.R +\name{get_orcid_email} +\alias{get_orcid_email} +\title{Retrieve an email address from an ORCID URL} +\usage{ +get_orcid_email(orcid_url) +} +\arguments{ +\item{orcid_url}{(character) A valid ORCID URL address.} +} +\value{ +(character) Public e-mail addresses. +} +\description{ +Retrieve public email addresses from an ORCID URL. +} +\examples{ +\dontrun{ +pi_email <- get_orcid_email('https://orcid.org/0000-0002-2561-5840') +} +} diff --git a/man/get_orcid_name.Rd b/man/get_orcid_name.Rd new file mode 100644 index 0000000..bfe8832 --- /dev/null +++ b/man/get_orcid_name.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.R +\name{get_orcid_name} +\alias{get_orcid_name} +\title{Retrieve a name from an ORCID URL} +\usage{ +get_orcid_name(orcid_url) +} +\arguments{ +\item{orcid_url}{(character) A valid ORCID URL address.} +} +\value{ +(character) First and last name. +} +\description{ +Retrieve first and last name from an ORCID URL. +} +\examples{ +\dontrun{ +pi_name <- get_orcid_name('https://orcid.org/0000-0002-2561-5840') +} +} diff --git a/man/get_package.Rd b/man/get_package.Rd index 7f7066d..537d3dc 100644 --- a/man/get_package.Rd +++ b/man/get_package.Rd @@ -2,24 +2,34 @@ % Please edit documentation in R/util.R \name{get_package} \alias{get_package} -\title{Get a structured list of PIDs for the objects in a package.} +\title{Get a structured list of PIDs for the objects in a package} \usage{ -get_package(node, pid, file_names = FALSE, rows = 1000) +get_package(node, pid, file_names = FALSE, rows = 5000) } \arguments{ -\item{node}{(MNode|CNode) The Coordinating/Member Node to run the query on.} +\item{node}{(MNode/CNode) The Coordinating/Member Node to run the query on.} -\item{pid}{(character) The the metadata PID of the package.} +\item{pid}{(character) The the resource map PID of the package.} \item{file_names}{(logical) Whether to return file names for all objects.} \item{rows}{(numeric) The number of rows to return in the query. This is only -useful to set if you are warned about the result set being truncated.} +useful to set if you are warned about the result set being truncated. Defaults to 5000.} } \value{ (list) A structured list of the members of the package. } \description{ -This is a wrapper function around `get_package_direct` which takes either -a resource map PID or a metadata PID as its `pid` argument. +Get a structured list of PIDs for the objects in a package, +including the resource map, metadata, and data objects. +} +\examples{ +\dontrun{ +#Set environment +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") +pid <- "resource_map_urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1" + +ids <- get_package(mn, pid) +} } diff --git a/man/get_package_direct.Rd b/man/get_package_direct.Rd deleted file mode 100644 index f4790f6..0000000 --- a/man/get_package_direct.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{get_package_direct} -\alias{get_package_direct} -\title{Get a structured list of PIDs for the objects in a package.} -\usage{ -get_package_direct(node, pid, file_names = FALSE, rows = 1000) -} -\arguments{ -\item{node}{(MNode|CNode) The Coordinating/Member Node to run the query on.} - -\item{pid}{(character) The the metadata PID of the package.} - -\item{file_names}{(logical) Whether to return file names for all objects.} - -\item{rows}{(numeric) The number of rows to return in the query. This is only -useful to set if you are warned about the result set being truncated.} -} -\value{ - -} -\description{ -Get a structured list of PIDs for the objects in a package. -} diff --git a/man/get_token.Rd b/man/get_token.Rd index 3be7382..b48ca1d 100644 --- a/man/get_token.Rd +++ b/man/get_token.Rd @@ -2,16 +2,23 @@ % Please edit documentation in R/dataone.R \name{get_token} \alias{get_token} -\title{Gets the currently set authentication token.} +\title{Get the currently set authentication token} \usage{ get_token(node) } \arguments{ -\item{node}{(MNode|CNode) The CN or MN you want to find a token for.} +\item{node}{(MNode/CNode) The Member/Coordinating Node to query.} } \value{ (character) The token. } \description{ -Gets the currently set authentication token. +Get the currently set authentication token. +} +\examples{ +\dontrun{ +cn <- CNode('STAGING2') +mn <- getMNode(cn,"urn:node:mnTestKNB") +get_token(mn) +} } diff --git a/man/get_token_subject.Rd b/man/get_token_subject.Rd deleted file mode 100644 index 47ebf56..0000000 --- a/man/get_token_subject.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{get_token_subject} -\alias{get_token_subject} -\title{Returns the subject of the set dataone_test_token} -\usage{ -get_token_subject() -} -\value{ -(character) The token subject. -} -\description{ -Returns the subject of the set dataone_test_token -} diff --git a/man/guess_format_id.Rd b/man/guess_format_id.Rd index 6493807..c637862 100644 --- a/man/guess_format_id.Rd +++ b/man/guess_format_id.Rd @@ -1,17 +1,20 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R +% Please edit documentation in R/formats.R \name{guess_format_id} \alias{guess_format_id} -\title{Guess format from filename for a vector of filenames.} +\title{Guess format from filename} \usage{ guess_format_id(filenames) } \arguments{ -\item{filenames}{(character)} +\item{filenames}{(character) A vector of filenames.} } \value{ -(character) DataOne format identifiers strings. +(character) DataONE format IDs. } \description{ Guess format from filename for a vector of filenames. } +\examples{ +formatid <- guess_format_id("temperature_data.csv") +} diff --git a/man/insert_file.Rd b/man/insert_file.Rd deleted file mode 100644 index f2f7f2a..0000000 --- a/man/insert_file.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/packaging.R -\name{insert_file} -\alias{insert_file} -\title{package.R -Author: Bryce Mecum } -\usage{ -insert_file(inventory, file, env = NULL) -} -\arguments{ -\item{inventory}{(data.frame) An Inventory.} - -\item{file}{(character) The fully-qualified relative path to the file. See examples.} - -\item{env}{(list) Optional. Specify an environment.} -} -\description{ -Code related to inserting datasets as Data Packages. -Insert a file from a single row of the Inventory. -} diff --git a/man/insert_package.Rd b/man/insert_package.Rd deleted file mode 100644 index 5c9ec1d..0000000 --- a/man/insert_package.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/packaging.R -\name{insert_package} -\alias{insert_package} -\title{Create a single package Data Package from files in the Inventory.} -\usage{ -insert_package(inventory, package, env = NULL) -} -\arguments{ -\item{inventory}{(data.frame) An Inventory.} - -\item{package}{(character) The package identifier.} - -\item{env}{(list) Environment variables.} - -\item{child_pids}{(character) Resource Map PIDs for child Data Packages.} -} -\value{ -A list containing PIDs and whether objects were inserted. (list) -} -\description{ -Create a single package Data Package from files in the Inventory. -} diff --git a/man/inv_add_extra_columns.Rd b/man/inv_add_extra_columns.Rd deleted file mode 100644 index 1caf78b..0000000 --- a/man/inv_add_extra_columns.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inventory.R -\name{inv_add_extra_columns} -\alias{inv_add_extra_columns} -\title{Adds a set of extra columsn to the inventory that are useful for working -with them.} -\usage{ -inv_add_extra_columns(inventory) -} -\arguments{ -\item{inventory}{(data.frame) An inventory.} -} -\value{ -An inventory (data.frame) -} -\description{ -Adds a set of extra columsn to the inventory that are useful for working -with them. -} diff --git a/man/inv_add_parent_package_column.Rd b/man/inv_add_parent_package_column.Rd deleted file mode 100644 index 83ec12c..0000000 --- a/man/inv_add_parent_package_column.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inventory.R -\name{inv_add_parent_package_column} -\alias{inv_add_parent_package_column} -\title{Add a column for parent packages.} -\usage{ -inv_add_parent_package_column(inventory) -} -\arguments{ -\item{inventory}{(data.frame) An Inventory.} -} -\value{ -inventory (data.frame) An Inventory. -} -\description{ -Add a column for parent packages. -} diff --git a/man/inv_init.Rd b/man/inv_init.Rd deleted file mode 100644 index 17450b9..0000000 --- a/man/inv_init.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inventory.R -\name{inv_init} -\alias{inv_init} -\title{inventory.R -Author: Bryce Mecum } -\usage{ -inv_init() -} -\value{ -An empty data frame -} -\description{ -Functions relating to keeping up an inventory of files that exist on the KNB -and may or may not be copied to another computer and untarred. -} -\details{ -Create an empty inventory data.frame. This doesn't need to be a function -but I'm making it one in case the initialization routine becomes more -complicated. -} diff --git a/man/inv_load_checksums.Rd b/man/inv_load_checksums.Rd deleted file mode 100644 index f88a278..0000000 --- a/man/inv_load_checksums.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inventory.R -\name{inv_load_checksums} -\alias{inv_load_checksums} -\title{Load checksums into the inventory file from a text file. This function -removes the column 'checksum_sha256' from inventory before doing a -left join.} -\usage{ -inv_load_checksums(inventory, path) -} -\arguments{ -\item{inventory}{(data.frame) An inventory.} - -\item{path}{(character) Path to a file containing sizes.} -} -\value{ -An inventory (data.frame) -} -\description{ -Load checksums into the inventory file from a text file. This function -removes the column 'checksum_sha256' from inventory before doing a -left join. -} diff --git a/man/inv_load_dois.Rd b/man/inv_load_dois.Rd deleted file mode 100644 index c16bece..0000000 --- a/man/inv_load_dois.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inventory.R -\name{inv_load_dois} -\alias{inv_load_dois} -\title{Load DOIs from a text file into the Inventory.} -\usage{ -inv_load_dois(inventory, path) -} -\arguments{ -\item{inventory}{(data.frame) An inventory.} - -\item{path}{Location of a text file with DOIs and file paths. (character)} -} -\value{ -(data.frame) The modified Inventory. -} -\description{ -Load DOIs from a text file into the Inventory. -} diff --git a/man/inv_load_files.Rd b/man/inv_load_files.Rd deleted file mode 100644 index 7a53821..0000000 --- a/man/inv_load_files.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inventory.R -\name{inv_load_files} -\alias{inv_load_files} -\title{Load files into the inventory from a text file.} -\usage{ -inv_load_files(inventory, path, filter = TRUE) -} -\arguments{ -\item{inventory}{(character) A \code{data.frame}.} - -\item{path}{(character) Path to a file containing a file listing.} - -\item{filter}{(logical) Filter out versioned datasets. Default is TRUE.} -} -\value{ -An inventory (data.frame) -} -\description{ -Files should be the output of the command: -} -\details{ -you@server:/path/to/acadis$ find . -type f -} diff --git a/man/inv_load_identifiers.Rd b/man/inv_load_identifiers.Rd deleted file mode 100644 index e1bab1f..0000000 --- a/man/inv_load_identifiers.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inventory.R -\name{inv_load_identifiers} -\alias{inv_load_identifiers} -\title{Load identifiers into the inventory file(s) from a text file. This function -removes the column 'identifier' from inventory before doing a -left join.} -\usage{ -inv_load_identifiers(inventory, paths) -} -\arguments{ -\item{inventory}{(data.frame) An inventory.} - -\item{path}{(character) Path(s) to files containing identifiers.} -} -\value{ -(data.frame) An inventory. -} -\description{ -Load identifiers into the inventory file(s) from a text file. This function -removes the column 'identifier' from inventory before doing a -left join. -} diff --git a/man/inv_load_sizes.Rd b/man/inv_load_sizes.Rd deleted file mode 100644 index 703f906..0000000 --- a/man/inv_load_sizes.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inventory.R -\name{inv_load_sizes} -\alias{inv_load_sizes} -\title{Load file sizes into an inventory from a text file. Removes the column -'size_bytes' from inventory before doing a left join.} -\usage{ -inv_load_sizes(inventory, path) -} -\arguments{ -\item{path}{(character) Path to a file containing sizes.} - -\item{(data.frame)}{inventory A \code{data.frame}.} -} -\value{ -(data.frame) An inventory -} -\description{ -Load file sizes into an inventory from a text file. Removes the column -'size_bytes' from inventory before doing a left join. -} diff --git a/man/inv_update.Rd b/man/inv_update.Rd deleted file mode 100644 index c98c210..0000000 --- a/man/inv_update.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inventory.R -\name{inv_update} -\alias{inv_update} -\title{Update an Inventory with a new Inventory.} -\usage{ -inv_update(inventory, new_state) -} -\arguments{ -\item{inventory}{(data.frame) The old Inventory.} - -\item{new_state}{(data.frame) The new Inventory.} -} -\description{ -Update an Inventory with a new Inventory. -} diff --git a/man/is_authorized.Rd b/man/is_authorized.Rd index d290d4c..f502cd1 100644 --- a/man/is_authorized.Rd +++ b/man/is_authorized.Rd @@ -2,17 +2,29 @@ % Please edit documentation in R/dataone.R \name{is_authorized} \alias{is_authorized} -\title{Check if the user has authorization to perform an action on an object.} +\title{Check if user has authorization to perform an action on an object} \usage{ is_authorized(node, ids, action) } \arguments{ -\item{node}{(MNode|CNode) The Node to query.} +\item{node}{(MNode/CNode) The Member/Coordinating Node to query.} \item{ids}{(character) The PID or SID to check.} \item{action}{(character) One of read, write, or changePermission.} } +\value{ +(logical) +} \description{ Check if the user has authorization to perform an action on an object. } +\examples{ +\dontrun{ +cn <- CNode('STAGING2') +mn <- getMNode(cn,"urn:node:mnTestKNB") +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +"urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +is_authorized(mn, pids, "write") +} +} diff --git a/man/is_format_id.Rd b/man/is_format_id.Rd deleted file mode 100644 index c74e975..0000000 --- a/man/is_format_id.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{is_format_id} -\alias{is_format_id} -\title{Test whether an object is a particular format ID.} -\usage{ -is_format_id(node, pids, format_id) -} -\arguments{ -\item{node}{(MNode|CNode) The Coordinating/Member Node to run the query on.} - -\item{pids}{(character)} - -\item{format_id}{(character)} -} -\value{ -(logical) -} -\description{ -Test whether an object is a particular format ID. -} diff --git a/man/is_obsolete.Rd b/man/is_obsolete.Rd index 9e5bcbb..ae55144 100644 --- a/man/is_obsolete.Rd +++ b/man/is_obsolete.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/util.R \name{is_obsolete} \alias{is_obsolete} -\title{Test whether the object is obsoleted by another object.} +\title{Test whether the object is obsoleted by another object} \usage{ is_obsolete(node, pids) } @@ -15,5 +15,15 @@ is_obsolete(node, pids) (logical) Whether or not the object is obsoleted by another object. } \description{ -Test whether the object is obsoleted by another object. +Test whether the object is obsoleted by another object +} +\examples{ +\dontrun{ +# Set environment +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") +pid <- "urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1" + +is_obsolete(mn, pid) +} } diff --git a/man/is_public_read.Rd b/man/is_public_read.Rd new file mode 100644 index 0000000..0f3851e --- /dev/null +++ b/man/is_public_read.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/access.R +\name{is_public_read} +\alias{is_public_read} +\title{Check whether an object has public read access} +\usage{ +is_public_read(mn, pids, use.names = TRUE) +} +\arguments{ +\item{mn}{(MNode) The Member Node.} + +\item{pids}{(character) The PIDs of the objects to check for public read access.} + +\item{use.names}{(logical) If \code{TRUE}, PIDs will +be used as names for the result unless PIDs have names already, in which case +those names will be used for the result.} +} +\value{ +(logical) Whether an object has public read access. +} +\description{ +Check whether objects have public read access. +No token needs to be set to use this function. +} +\examples{ +\dontrun{ +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", + "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +is_public_read(mn, pids) +} +} diff --git a/man/is_resource_map.Rd b/man/is_resource_map.Rd deleted file mode 100644 index 8be6761..0000000 --- a/man/is_resource_map.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{is_resource_map} -\alias{is_resource_map} -\title{Determines whether the object with the given PID is a resource map.} -\usage{ -is_resource_map(node, pids) -} -\arguments{ -\item{node}{(MNode|CNode) The Coordinating/Member Node to run the query on.} - -\item{pids}{(character) Vector of PIDs} -} -\value{ -(logical) Whether or not the object(s) are resource maps -} -\description{ -Determines whether the object with the given PID is a resource map. -} diff --git a/man/is_token_expired.Rd b/man/is_token_expired.Rd index dfa9d4b..7df7e0e 100644 --- a/man/is_token_expired.Rd +++ b/man/is_token_expired.Rd @@ -2,10 +2,23 @@ % Please edit documentation in R/dataone.R \name{is_token_expired} \alias{is_token_expired} -\title{Determine whether the set token is expired.} +\title{Determine whether token is expired} \usage{ is_token_expired(node) } +\arguments{ +\item{node}{(character) The Member Node.} +} +\value{ +(logical) +} \description{ Determine whether the set token is expired. } +\examples{ +\dontrun{ +cn <- CNode('STAGING2') +mn <- getMNode(cn,"urn:node:mnTestKNB") +is_token_expired(mn) +} +} diff --git a/man/is_token_set.Rd b/man/is_token_set.Rd index 8f1c739..022ec60 100644 --- a/man/is_token_set.Rd +++ b/man/is_token_set.Rd @@ -2,17 +2,23 @@ % Please edit documentation in R/dataone.R \name{is_token_set} \alias{is_token_set} -\title{dataone.R} +\title{Test whether a token is set} \usage{ is_token_set(node) } \arguments{ -\item{node}{(MNode|CNode) The CN or MN you want to find a token for.} +\item{node}{(MNode/CNode) The Member/Coordinating Node to query.} } \value{ -(boolean) +(logical) } \description{ -Helpers for the DataONE R package. Test whether a token is set. } +\examples{ +\dontrun{ +cn <- CNode('STAGING2') +mn <- getMNode(cn,"urn:node:mnTestKNB") +is_token_set(mn) +} +} diff --git a/man/list_submissions.Rd b/man/list_submissions.Rd new file mode 100644 index 0000000..9b48388 --- /dev/null +++ b/man/list_submissions.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.R +\name{list_submissions} +\alias{list_submissions} +\title{List recent submissions to a DataOne Member Node} +\usage{ +list_submissions(mn, from = Sys.Date(), to = Sys.Date(), + formatType = "*", use_whitelist = T) +} +\arguments{ +\item{mn}{(MNode) A DataOne Member Node} + +\item{from}{(character) the date at which the query begins in 'YYYY/MM/DD' format. Defaults to \code{Sys.Date()}} + +\item{to}{(character) the date at which the query ends in 'YYYY/MM/DD' format. Defaults to \code{Sys.Date()}} + +\item{formatType}{(character) the format of objects to query. Must be one of: RESOURCE, METADATA, DATA, or *.} + +\item{use_whitelist}{(logical) Whether to filter out ADC admins, as listed at: https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org} +} +\description{ +List recent submissions to a DataOne Member Node from all submitters not present +in the administrator whitelist: https://cn.dataone.org/cn/v2/accounts/CN=arctic-data-admins,DC=dataone,DC=org +} +\examples{ +\dontrun{ +cn <- dataone::CNode('PROD') +adc <- dataone::getMNode(cn,'urn:node:ARCTIC') + +View(arcticdatautils::list_submissions(adc, '2018-10-01', '2018-10-07')) + +# Return all submitted objects in the past month for the 'adc' node: +library(lubridate) +View(list_submissions(adc, Sys.Date() \%m+\% months(-1), Sys.Date(), '*')) + +# Return all submitted objects except for one user +library(lubridate) +View(list_submissions(adc, Sys.Date() \%m+\% months(-1), Sys.Date(), '*'), + whitelist = 'http://orcid.org/0000-0002-2561-5840') + +} +} +\author{ +Dominic Mullen dmullen17@gmail.com +} diff --git a/man/log_message.Rd b/man/log_message.Rd deleted file mode 100644 index aac3eee..0000000 --- a/man/log_message.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{log_message} -\alias{log_message} -\title{Log a message to the console and to a logfile.} -\usage{ -log_message(message = NULL) -} -\arguments{ -\item{message}{(character) Your log message.} -} -\value{ -Nothing. -} -\description{ -Reads from the environment variable 'LOG_PATH' and uses the value set there -to decide the location of the log file. If that envvar isn't set, it defaults -to 'arcticdata-log.txt'. -} diff --git a/man/mdq_run.Rd b/man/mdq_run.Rd index 8b0f253..7c06493 100644 --- a/man/mdq_run.Rd +++ b/man/mdq_run.Rd @@ -2,18 +2,29 @@ % Please edit documentation in R/quality.R \name{mdq_run} \alias{mdq_run} -\title{Score a metadata document against a MetaDIG Suite} +\title{Score a metadata document against a MetaDIG suite} \usage{ mdq_run(document, suite_id = "arctic.data.center.suite.1") } \arguments{ -\item{document}{(eml or character) Either an EML object or path to a file on disk.} +\item{document}{(eml/character) Either an EML object or path to a file on disk.} -\item{suite_id}{(character) Optional. Specificy a suite ID. Should be one of https://quality.nceas.ucsb.edu/quality/suites} +\item{suite_id}{(character) Specify a suite ID. Should be one of \url{https://quality.nceas.ucsb.edu/quality/suites}.} } \value{ -(data.frame) A sorted table of Check results +(data.frame) A sorted data.frame of check results. } \description{ -Score a metadata document against a MetaDIG Suite +This function scores a metadata document against a MetaDIG suite. +The default suite is for the Arctic Data Center. +} +\examples{ +\dontrun{ +# Check an EML document you are authoring +library(EML) +mdq_run(new("eml")) + +# Check an EML document that is saved to disk +mdq_run(system.file("examples", "example-eml-2.1.1.xml", package = "EML")) +} } diff --git a/man/new_uuid.Rd b/man/new_uuid.Rd index 58573e6..608486e 100644 --- a/man/new_uuid.Rd +++ b/man/new_uuid.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/util.R \name{new_uuid} \alias{new_uuid} -\title{Helper function to generate a new UUID PID.} +\title{Generate a new UUID PID} \usage{ new_uuid() } @@ -10,5 +10,8 @@ new_uuid() (character) A new UUID PID. } \description{ -Helper function to generate a new UUID PID. +Generate a new UUID PID. +} +\examples{ +id <- new_uuid() } diff --git a/man/object_exists.Rd b/man/object_exists.Rd index 901cc81..805db74 100644 --- a/man/object_exists.Rd +++ b/man/object_exists.Rd @@ -2,19 +2,30 @@ % Please edit documentation in R/util.R \name{object_exists} \alias{object_exists} -\title{Check if an object exists on a Member Node.} +\title{Check if an object exists on a Member Node} \usage{ object_exists(node, pids) } \arguments{ -\item{node}{(MNode|CNode) The Node to query.} +\item{node}{(MNode) The Member Node to query.} -\item{pid}{(character) PID to check the existence of.} +\item{pids}{(character) The PID(s) to check the existence of.} } \value{ (logical) Whether the object exists. } \description{ This is a simple check for the HTTP status of a /meta/{PID} call on the -provided member node. +provided Member Mode. +} +\examples{ +\dontrun{ +# Set environment +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +"urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") + +object_exists(mn, pids) +} } diff --git a/man/parse_resource_map.Rd b/man/parse_resource_map.Rd index 2abf8d0..906f8a1 100644 --- a/man/parse_resource_map.Rd +++ b/man/parse_resource_map.Rd @@ -2,16 +2,29 @@ % Please edit documentation in R/packaging.R \name{parse_resource_map} \alias{parse_resource_map} -\title{Parse a Resource Map into a data.frame} +\title{Parse a resource map into a data.frame} \usage{ parse_resource_map(path) } \arguments{ -\item{path}{(character) Path to the resource map (an RDF/XML file)} +\item{path}{(character) Path to the resource map (an RDF/XML file).} } \value{ -(data.frame) The statements in the Resource Map +(data.frame) The statements in the resource map. } \description{ -Parse a Resource Map into a data.frame +Parse a resource map into a data.frame. +} +\examples{ +\dontrun{ +# Set environment +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") + +rm_pid <- "resource_map_urn:uuid:6b2e5753-4a94-4e6f-971c-36420a446ecb" + +# Write resource map to file +writeBin(getObject(mn, rm_pid), "~/Documents/resource_map.rdf") +df <- parse_resource_map("~/Documents/resource_map.rdf") +} } diff --git a/man/path_join.Rd b/man/path_join.Rd deleted file mode 100644 index 8f74594..0000000 --- a/man/path_join.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{path_join} -\alias{path_join} -\title{(Intelligently) join (possibly redudant) path parts together.} -\usage{ -path_join(path_parts = c("")) -} -\arguments{ -\item{path_parts}{(character)} -} -\value{ -(character)The joined path string. -} -\description{ -Joins path strings like "./" to "./my/dir" as "./my/dir" instead of as -"././my/dir. -} diff --git a/man/pid_to_eml_entity.Rd b/man/pid_to_eml_entity.Rd new file mode 100644 index 0000000..cf1d575 --- /dev/null +++ b/man/pid_to_eml_entity.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml.R +\name{pid_to_eml_entity} +\alias{pid_to_eml_entity} +\title{Create EML entity with physical section from a DataONE PID} +\usage{ +pid_to_eml_entity(mn, pid, entity_type = "otherEntity", ...) +} +\arguments{ +\item{mn}{(MNode) Member Node where the PID is associated with an object.} + +\item{pid}{(character) The PID of the object to create the sub-tree for.} + +\item{entity_type}{(character) What kind of object to create from the input. One of "dataTable", +"spatialRaster", "spatialVector", "storedProcedure", "view", or "otherEntity".} + +\item{...}{(optional) Additional arguments to be passed to \code{eml$entityType())}.} +} +\value{ +(list) The entity object. +} +\description{ +Create EML entity with physical section from a DataONE PID +} +\examples{ +\dontrun{ +# Generate EML otherEntity +pid_to_eml_entity(mn, + pid, + entity_type = "otherEntity", + entityName = "Entity Name", + entityDescription = "Description about entity") +} +} diff --git a/man/pid_to_eml_other_entity.Rd b/man/pid_to_eml_other_entity.Rd deleted file mode 100644 index d371226..0000000 --- a/man/pid_to_eml_other_entity.Rd +++ /dev/null @@ -1,31 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/eml.R -\name{pid_to_eml_other_entity} -\alias{pid_to_eml_other_entity} -\title{eml.R} -\usage{ -pid_to_eml_other_entity(mn, pids) -} -\arguments{ -\item{mn}{(MNode) Member Node where the PID is associated with an object.} - -\item{pids}{(character) The PID of the object to create the sub-tree for.} -} -\value{ -(list of otherEntity) The otherEntity object(s) -} -\description{ -Helpers for creating EML. -Create EML otherEntity objects for a set of PIDs -} -\details{ -Note this is a wrapper around sysmeta_to_other_entity which handles the task of -creating the EML otherEntity. -} -\examples{ -\dontrun{ -# Generate EML otherEntity objects for all the data in a package -pkg <- get_package(mn, pid) -pid_to_other_entity(mn, pkg$data) -} -} diff --git a/man/pid_to_eml_physical.Rd b/man/pid_to_eml_physical.Rd index 3b8d353..623024e 100644 --- a/man/pid_to_eml_physical.Rd +++ b/man/pid_to_eml_physical.Rd @@ -4,24 +4,23 @@ \alias{pid_to_eml_physical} \title{Create EML physical objects for the given set of PIDs} \usage{ -pid_to_eml_physical(mn, pids) +pid_to_eml_physical(mn, pid) } \arguments{ \item{mn}{(MNode) Member Node where the PID is associated with an object.} -\item{pids}{(character) The PID of the object to create the sub-tree for.} +\item{pid}{(character) The PID of the object to create the physical for.} } \value{ -(list of otherEntity) The otherEntity object(s) +(list) A physical object. } \description{ -Note this is a wrapper around sysmeta_to_eml_physical which handles the task of -creating the EML physical +This is a wrapper around \code{\link[=sysmeta_to_eml_physical]{sysmeta_to_eml_physical()}} which handles the task of +creating the EML physical. } \examples{ \dontrun{ -# Generate EML physical objects for all the data in a package -pkg <- get_package(mn, pid) -pid_to_eml_physical(mn, pkg$data) +# Generate EML physical sections for an object in a data package +phys <- pid_to_eml_physical(mn, pid) } } diff --git a/man/pretty_print.Rd b/man/pretty_print.Rd deleted file mode 100644 index f000c7e..0000000 --- a/man/pretty_print.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modify_metadata.R -\name{pretty_print} -\alias{pretty_print} -\title{Uses XMLStarlet to pretty-print/beautify an XML document.} -\usage{ -pretty_print(path) -} -\arguments{ -\item{path}{Path to your file you want pretty-printed. (character)} -} -\value{ -Returns the result of the `system` command (0 = success) -} -\description{ -This command just runs `xmlstarlet path > path`, doing a simple -pretty-printing of the file located at `path`. -} -\details{ -Note that this function is doing an in-place pretty printing instead of -returning the pretty-printed text. - -Note that this command uses a temporary file as an intermediate step in the -pretty-printing process. For some reason, when running xmlstarlet from within -R, the same file can't be used as the input to `xmlstarlet format` and as the -shell redirection file (`> somefile.txt`). If you try to run `xmlstarlet -format` on the same file as you redirect to, you get a weird parse error from -xmlstarlet. -} diff --git a/man/publish_object.Rd b/man/publish_object.Rd index 0a603d8..f779ff2 100644 --- a/man/publish_object.Rd +++ b/man/publish_object.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/editing.R \name{publish_object} \alias{publish_object} -\title{editing.R} +\title{Publish an object on a Member Node} \usage{ publish_object(mn, path, format_id = NULL, pid = NULL, sid = NULL, clone_pid = NULL, public = TRUE) @@ -10,24 +10,35 @@ publish_object(mn, path, format_id = NULL, pid = NULL, sid = NULL, \arguments{ \item{mn}{(MNode) The Member Node to publish the object to.} -\item{path}{the path to the file to be published} +\item{path}{(character) The path to the file to be published.} -\item{format_id}{(character) Optional. The format ID to set for the object. When not set, \code{\link{guess_format_id}} will be used to guess the format ID. Should be a \href{https://cn.dataone.org/cn/v2/formats}{DataONE format ID}.} +\item{format_id}{(character) Optional. The format ID to set for the object. +When not set, \code{\link[=guess_format_id]{guess_format_id()}} will be used to guess the format ID. +Should be a \href{https://cn.dataone.org/cn/v2/formats}{DataONE format ID}.} \item{pid}{(character) Optional. The PID to use with the object.} \item{sid}{(character) Optional. The SID to use with the new object.} -\item{clone_pid}{(character) PID of objet to clone System Metadata from} +\item{clone_pid}{(character) PID of object to clone System Metadata from.} + +\item{public}{(logical) Whether object should be given public read access.} } -\description{ -High-level functions for managing content. -Publish an object on a member node +\value{ +pid (character) The PID of the published object. } -\details{ -Use sensible defaults to publish an object on a member node. If identifier is provided, -use it, otherwise generate a UUID. If clone_id is provided, then retrieve the +\description{ +Use sensible defaults to publish an object on a Member Node. If identifier is provided, +use it, otherwise generate a UUID. If clone_id is provided, then retrieve the system metadata for that identifier and use it to provide rightsHolder, accessPolicy, and replicationPolicy metadata. Note that this function only uploads the object to the Member Node, and does not add it to a data package, which can be done separately. } +\examples{ +\dontrun{ +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") +my_path <- "/home/Documents/myfile.csv" +pid <- publish_object(mn, path = my_path, format_id = "text/csv", public = FALSE) +} +} diff --git a/man/publish_update.Rd b/man/publish_update.Rd index efbcab7..fe1d1d2 100644 --- a/man/publish_update.Rd +++ b/man/publish_update.Rd @@ -2,13 +2,14 @@ % Please edit documentation in R/editing.R \name{publish_update} \alias{publish_update} -\title{Publish an updated data package.} +\title{Publish an updated data package} \usage{ publish_update(mn, metadata_pid, resource_map_pid, data_pids = NULL, child_pids = NULL, metadata_path = NULL, identifier = NULL, - use_doi = FALSE, parent_resmap_pid = NULL, parent_metadata_pid = NULL, - parent_data_pids = NULL, parent_child_pids = NULL, public = TRUE, - check_first = TRUE) + use_doi = FALSE, parent_resmap_pid = NULL, + parent_metadata_pid = NULL, parent_data_pids = NULL, + parent_child_pids = NULL, public = TRUE, check_first = TRUE, + format_id = NULL, keep_prov = FALSE) } \arguments{ \item{mn}{(MNode) The Member Node to update the object on.} @@ -21,46 +22,74 @@ publish_update(mn, metadata_pid, resource_map_pid, data_pids = NULL, \item{child_pids}{(character) Optional. Child packages resource map PIDs.} -\item{metadata_path}{(character) Optional. Path to a metadata file to update with. If this is not set, the existing metadata document will be used.} +\item{metadata_path}{(character or eml) Optional. An eml class object or a path to a metadata file to update with. +If this is not set, the existing metadata document will be used.} \item{identifier}{(character) Manually specify the identifier for the new metadata object.} \item{use_doi}{(logical) Generate and use a DOI as the identifier for the updated metadata object.} -\item{parent_resmap_pid}{(character) Optional. PID of a parent package to be updated.} +\item{parent_resmap_pid}{(character) Optional. PID of a parent package to be updated. +Not optional if a parent package exists.} -\item{parent_metadata_pid}{(character) Optional. Identifier for the metadata document of the parent package.} +\item{parent_metadata_pid}{(character) Optional. Identifier for the metadata document of the parent package. +Not optional if a parent package exists.} -\item{parent_data_pids}{(character) Optional. Identifier for the data objects of the parent package.} +\item{parent_data_pids}{(character) Optional. Identifier for the data objects of the parent package. +Not optional if the parent package contains data objects.} -\item{parent_child_pids}{(character) Optional. Resource map identifier(s) of child packages in the parent package.} +\item{parent_child_pids}{(character) Optional. Resource map identifier(s) of child packages in the parent package. +\code{resource_map_pid} should not be included. Not optional if the parent package contains other child packages.} -\item{public}{(logical) Optional. Make the update public. If FALSE, will set the metadata and resource map to private (but not the data objects). +\item{public}{(logical) Optional. Make the update public. If \code{FALSE}, will set the metadata and resource map to private (but not the data objects). This applies to the new metadata PID and its resource map and data object. access policies are not affected.} -\item{check_first}{(logical) Optional. Whether to check the PIDs passed in as aruments exist on the MN before continuing. Checks that objects exist and are of the right format type. This speeds up the function, especially when `data_pids` has many elements.} +\item{check_first}{(logical) Optional. Whether to check the PIDs passed in as arguments exist on the MN before continuing. +Checks that objects exist and are of the right format type. This speeds up the function, especially when \code{data_pids} has many elements.} -\item{parent_data_pids}{} +\item{format_id}{(character) Optional. When omitted, the updated object will have the same formatId as \code{metadata_pid}. If set, will attempt +to use the value instead. +@param keep_prov (logical) Option to force publish_update to keep prov} +} +\value{ +(character) Named character vector of PIDs in the data package, including PIDs for the metadata, resource map, and data objects. } \description{ -This function can be used for a variety of tasks: +Publish an update to a data package after updating data files or metadata. } \details{ +This function can be used for a variety of tasks: + \itemize{ - \item Publish an existing package with a DOI - \item Update a package with new data objects - \item Update a package with new metadata +\item Publish an existing package with a DOI +\item Update a package with new data objects +\item Update a package with new metadata } The metadata_pid and resource_map_pid provide the identifier of an EML metadata document and associated resource map, and the data_pids vector provides a list of PIDs of data objects in the package. Update the metadata file and resource map -by generating a new identifier (a DOI if use_doi is TRUE) and updating the Member +by generating a new identifier (a DOI if \code{use_doi = TRUE}) and updating the Member Node with a public version of the object. If metadata_file is not missing, it should be an edited version of the metadata to be used to update the original. If parent_resmap_pid is not missing, it indicates the PID of a parent package that -should be updated as well, using the parent_medata_pid, parent_data_pids, and +should be updated as well, using the parent_metadata_pid, parent_data_pids, and parent_child_pids as members of the updated package. In all cases, the objects are made publicly readable. } +\examples{ +\dontrun{ +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") + +rm_pid <- "resource_map_urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" +meta_pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" +data_pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +"urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") + +meta_path <- "/home/Documents/myMetadata.xml" + +publish_update(mn, meta_pid, rm_pid, data_pids, meta_path, public = TRUE) +} +} diff --git a/man/read_zip_shapefile.Rd b/man/read_zip_shapefile.Rd new file mode 100644 index 0000000..9dc85f0 --- /dev/null +++ b/man/read_zip_shapefile.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.R +\name{read_zip_shapefile} +\alias{read_zip_shapefile} +\title{Read a shapefile from a pid} +\usage{ +read_zip_shapefile(mn, pid) +} +\arguments{ +\item{mn}{(MNode) A DataOne Member Node} + +\item{pid}{(character) An object identifier} +} +\value{ +shapefile (sf) The shapefile as an \code{sf} object +} +\description{ +Read a shapefile from a pid that points to the zipped directory of the shapefile and associated files +on a given member node. +} +\examples{ +\dontrun{ +cn <- dataone::CNode('PROD') +adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +pid <- "urn:uuid:294a365f-c0d1-4cc3-a508-2e16260aa70c" + +shapefile <- read_zip_shapefile(adc, pid) +} +} +\author{ +Jeanette Clark jclark@nceas.ucsb.edu +} diff --git a/man/recover_failed_submission.Rd b/man/recover_failed_submission.Rd new file mode 100644 index 0000000..d438ab3 --- /dev/null +++ b/man/recover_failed_submission.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.R +\name{recover_failed_submission} +\alias{recover_failed_submission} +\title{Recovers failed submissions} +\usage{ +recover_failed_submission(node, pid, path) +} +\arguments{ +\item{node}{(MNode) The Member Node to publish the object to.} + +\item{pid}{The PID of the EML metadata document to be recovered.} + +\item{path}{path to write XML.} +} +\value{ +recovers and write the valid EML to the indicated path +} +\description{ +Recovers failed submissions and write the new, valid EML to a given path +} +\examples{ +\dontrun{ +# Set environment +cn <- dataone::CNode("STAGING2") +mn <- dataone::getMNode(cn,"urn:node:mnTestKNB") +pid <- "urn:uuid:b1a234f0-eed5-4f58-b8d5-6334ce07c010" +path <- tempfile("file", fileext = ".xml") +recover_failed_submission(mn, pid, path) +eml <- EML::read_eml(path) +} +} +\author{ +Rachel Sun rachelsun@ucsb.edu +} diff --git a/man/recover_prov.Rd b/man/recover_prov.Rd new file mode 100644 index 0000000..a7722fd --- /dev/null +++ b/man/recover_prov.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/editing.R +\name{recover_prov} +\alias{recover_prov} +\title{Get a data.frame of prov statements from a resource map pid.} +\usage{ +recover_prov(mn, rm_pid) +} +\arguments{ +\item{mn}{(mn) A memeber node instance} + +\item{rm_pid}{(character) A resource map identifier} +} +\value{ +a data.frame of prov statments +} +\description{ +This is a function that is useful if you need to recover lost prov statements. It returns +a data.frame of statements that can be passed to \code{update_resource_map} in the \code{other_statements} +argument. +} diff --git a/man/reformat_file_name.Rd b/man/reformat_file_name.Rd new file mode 100644 index 0000000..f2898ac --- /dev/null +++ b/man/reformat_file_name.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/editing.R +\name{reformat_file_name} +\alias{reformat_file_name} +\title{Helper for publish_object. Reformat the filName in system metadata.} +\usage{ +reformat_file_name(path, sysmeta) +} +\arguments{ +\item{path}{(character) full file path} + +\item{sysmeta}{(S4) A system metadata object} +} +\description{ +Reformat the fileName field in an object's system metadata to follow Arctic Data Center +system metdata naming conventions. Publish_object calls this function to rename +the fileName field in system metadata. +} diff --git a/man/remove_access.Rd b/man/remove_access.Rd new file mode 100644 index 0000000..221aad1 --- /dev/null +++ b/man/remove_access.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/access.R +\name{remove_access} +\alias{remove_access} +\title{Remove a subject from an object's access policy} +\usage{ +remove_access(mn, pids, subjects, permissions = c("read", "write", + "changePermission")) +} +\arguments{ +\item{mn}{(MNode) The Member Node.} + +\item{pids}{(character) The PIDs of the objects to set permissions for.} + +\item{subjects}{(character) The identifiers of the subjects to set permissions for, typically an ORCID or DN.} + +\item{permissions}{(character) Optional. The permissions to set. Defaults to +read, write, and changePermission.} +} +\value{ +(logical) Whether an update was needed. +} +\description{ +Remove the given subjects from the access policy for the given objects on the given Member Node. +For each type of permission, this function checks if the permission is already set +and only updates the System Metadata when a change is needed. +} +\examples{ +\dontrun{ +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", + "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +remove_access(mn, pids, subjects = "http://orcid.org/0000-000X-XXXX-XXXX", + permissions = c("read", "write", "changePermission")) +} +} diff --git a/man/remove_public_access.Rd b/man/remove_public_access.Rd deleted file mode 100644 index 86a63bc..0000000 --- a/man/remove_public_access.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sysmeta.R -\name{remove_public_access} -\alias{remove_public_access} -\title{Remove all public read access rules from a System Metadata document} -\usage{ -remove_public_access(sysmeta) -} -\arguments{ -\item{sysmeta}{(SystemMetadata) The System Metadata document. to change.} -} -\value{ -(SystemMetadata) The potentially modified System Metadata document. -} -\description{ -Remove all public read access rules from a System Metadata document -} diff --git a/man/remove_public_read.Rd b/man/remove_public_read.Rd index a86c026..b78480b 100644 --- a/man/remove_public_read.Rd +++ b/man/remove_public_read.Rd @@ -2,15 +2,24 @@ % Please edit documentation in R/access.R \name{remove_public_read} \alias{remove_public_read} -\title{Remove public access on a set of objects.} +\title{Remove public read access for an object} \usage{ remove_public_read(mn, pids) } \arguments{ -\item{mn}{(MNode)} +\item{mn}{(MNode) The Member Node.} -\item{pids}{(character) A vector of PIDs to set public access on} +\item{pids}{(character) The PIDs of the objects to remove public read access for.} } \description{ -Remove public access on a set of objects. +Remove public read access for an object. +} +\examples{ +\dontrun{ +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +"urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +remove_public_read(mn, pids) +} } diff --git a/man/reorder_pids.Rd b/man/reorder_pids.Rd new file mode 100644 index 0000000..313e6f0 --- /dev/null +++ b/man/reorder_pids.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml.R +\name{reorder_pids} +\alias{reorder_pids} +\title{Reorder a named list of objects according to the order in the metadata} +\usage{ +reorder_pids(pid_list, doc) +} +\arguments{ +\item{pid_list}{(list) A named list of data pids} + +\item{doc}{(list) an \code{emld} document} +} +\value{ +ordered_pids (list) A list of reordered pids +} +\description{ +This function takes a named list of data objects, such as what is +returned from \code{get_package}, and reorders them according to the order +they are given in the EML document. +} +\examples{ +\dontrun{ +cn <- dataone::CNode('PROD') +adc <- dataone::getMNode(cn,'urn:node:ARCTIC') +ids <- get_package(adc, 'resource_map_doi:10.18739/A2S17SS1M', file_names = TRUE) +doc <- EML::read_eml(dataone::getObject(adc, ids$metadata)) + +# return all entity types +ordered_pids <- reorder_pids(ids$data, doc) +} + +} diff --git a/man/replace_package_id.Rd b/man/replace_package_id.Rd deleted file mode 100644 index fdd67b2..0000000 --- a/man/replace_package_id.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{replace_package_id} -\alias{replace_package_id} -\title{Replace the EML 'packageId' attribute on the root element with a -certain value.} -\usage{ -replace_package_id(path, replacement) -} -\arguments{ -\item{path}{(character) Path to the XML file to edit.} - -\item{replacement}{(character) The new value.} -} -\description{ -Replace the EML 'packageId' attribute on the root element with a -certain value. -} diff --git a/man/replace_subject.Rd b/man/replace_subject.Rd deleted file mode 100644 index 963b5c2..0000000 --- a/man/replace_subject.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sysmeta.R -\name{replace_subject} -\alias{replace_subject} -\title{Replace subjects in the accessPolicy section of a System Metadata entries.} -\usage{ -replace_subject(sysmeta, from = "cn=arctic-data-admins,dc=dataone,dc=org", - to = "CN=arctic-data-admins,DC=dataone,DC=org") -} -\arguments{ -\item{sysmeta}{(SystemMetadata) The System Metadata object.} - -\item{from}{(character) The DN string to replace.} - -\item{to}{(character) The DN string to put in place of `from`.} -} -\value{ -The modified System Metadata (SystemMetadata) -} -\description{ -This function was written out to fix capitalization errors but in a set of -existing System Metadata entries but can be used to replace any subject. -} diff --git a/man/set_abstract.Rd b/man/set_abstract.Rd index 52c91ca..044bec3 100644 --- a/man/set_abstract.Rd +++ b/man/set_abstract.Rd @@ -2,25 +2,21 @@ % Please edit documentation in R/eml.R \name{set_abstract} \alias{set_abstract} -\title{Set the abstract on an EML document} +\title{Set the abstract for an EML document} \usage{ set_abstract(doc, text) } \arguments{ -\item{doc}{(eml) An EML document} +\item{doc}{(eml) An EML document.} \item{text}{(character) The abstract text. If \code{text} is length one, an -abstract without \code{} or \code{section} elements will be created. +abstract without \code{} or \code{
    } elements will be created. If \code{text} is greater than one in length, \code{para} elementes will be used for each element.} } \value{ -(eml) The modified EML document +(eml) The modified EML document. } \description{ -Set the abstract on an EML document -} -\examples{ -set_abstract(doc, c("Test abstract...")) -set_abstract(doc, c("First para", "second para")) +Set the abstract for an EML document. } diff --git a/man/set_access.Rd b/man/set_access.Rd index ac4a37d..eff2dc5 100644 --- a/man/set_access.Rd +++ b/man/set_access.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/access.R \name{set_access} \alias{set_access} -\title{Set the access policy for a set of objects.} +\title{Set the access policy for an object} \usage{ set_access(mn, pids, subjects, permissions = c("read", "write", "changePermission")) @@ -10,16 +10,28 @@ set_access(mn, pids, subjects, permissions = c("read", "write", \arguments{ \item{mn}{(MNode) The Member Node.} -\item{pids}{(character) The object(s) to set the permissions on.} +\item{pids}{(character) The PIDs of the objects to set permissions for.} -\item{subjects}{(character) The subject(s) to set permissions for.} +\item{subjects}{(character) The identifiers of the subjects to set permissions for, typically an ORCID or DN.} -\item{permissions}{(character) Optional. Vector of permissions.} +\item{permissions}{(character) Optional. The permissions to set. Defaults to +read, write, and changePermission.} } \value{ -(logical) Named +(logical) Whether an update was needed. } \description{ -For each permission, this function checks if the permission is already set -and moves on. System Metadata are only updated when a change was needed. +Set the access policy for the given subjects for the given objects on the given Member Node. +For each type of permission, this function checks if the permission is already set +and only updates the System Metadata when a change is needed. +} +\examples{ +\dontrun{ +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", + "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +set_access(mn, pids, subjects = "http://orcid.org/0000-000X-XXXX-XXXX", + permissions = c("read", "write", "changePermission")) +} } diff --git a/man/set_file_name.Rd b/man/set_file_name.Rd index f03b1d3..d1166e9 100644 --- a/man/set_file_name.Rd +++ b/man/set_file_name.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/editing.R \name{set_file_name} \alias{set_file_name} -\title{Set the file name on an object} +\title{Set the file name for an object} \usage{ set_file_name(mn, pid, name) } @@ -14,8 +14,17 @@ set_file_name(mn, pid, name) \item{name}{(character) The file name.} } \value{ -(logical) Whether the update succeeded, FALSE means there was an error. +(logical) Whether the update succeeded. } \description{ -Set the file name on an object +Set the file name for an object. +} +\examples{ +\dontrun{ +cn <- CNode("STAGING2") +mn <- getMNode(cn, "urn:node:mnTestKNB") + +pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" +set_file_name(mn, pid, "myfile.csv") +} } diff --git a/man/set_other_entities.Rd b/man/set_other_entities.Rd deleted file mode 100644 index a5c9f40..0000000 --- a/man/set_other_entities.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/eml.R -\name{set_other_entities} -\alias{set_other_entities} -\title{Creates and sets EML otherEntity elements to an existing EML document, -replacing any existing otherEntities} -\usage{ -set_other_entities(mn, path, pids) -} -\arguments{ -\item{mn}{(MNode) The Member Node the objects exist on.} - -\item{path}{(character) The location on disk of the EML file.} - -\item{pids}{(character) One or more PIDs for the objects.} -} -\value{ -(character) The path to the updated EML file. -} -\description{ -This function is slow because it needs get the System Metadata for each -element of `pids` in order to get the fileName, checksum, etc. -} -\examples{ -\dontrun{ -mn <- MNode(...) # Set up a connection to an MN -eml_path <- "/path/to/your/eml.xml" -set_other_entities(mn, eml_path, "a_data_pid") -} -} diff --git a/man/set_public_read.Rd b/man/set_public_read.Rd index 381024e..8cee05b 100644 --- a/man/set_public_read.Rd +++ b/man/set_public_read.Rd @@ -2,15 +2,27 @@ % Please edit documentation in R/access.R \name{set_public_read} \alias{set_public_read} -\title{Set public access on a set of objects.} +\title{Set public read access for an object} \usage{ set_public_read(mn, pids) } \arguments{ -\item{mn}{(MNode)} +\item{mn}{(MNode) The Member Node.} -\item{pids}{(character) A vector of PIDs to set public access on} +\item{pids}{(character) The PIDs of the objects to set public read access for.} +} +\value{ +(logical) Whether an update was needed. } \description{ -Set public access on a set of objects. +Set public read access for an object. +} +\examples{ +\dontrun{ +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", + "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +set_public_read(mn, pids) +} } diff --git a/man/set_public_read_all_versions.Rd b/man/set_public_read_all_versions.Rd new file mode 100644 index 0000000..4f2e196 --- /dev/null +++ b/man/set_public_read_all_versions.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/util.R +\name{set_public_read_all_versions} +\alias{set_public_read_all_versions} +\title{Set public READ access on all versions of PIDs in data package.} +\usage{ +set_public_read_all_versions(mn, resource_map_pid) +} +\arguments{ +\item{mn}{(MNode) The Member Node to query.} + +\item{resource_map_pid}{(character) The resource map identifier (PID).} +} +\description{ +Set public READ access on all versions of PIDs in data package. +} +\examples{ +\dontrun{ +cn_staging <- CNode('STAGING') +adc_test <- getMNode(cn_staging,'urn:node:mnTestARCTIC') +# Create a dummy package then create another version with 'publish_update()' +pkg <- create_dummy_package(adc_test) +remove_public_read(mn, unlist(pkg)) +pkg_v2 <- publish_update(adc_test, pkg$metadata, pkg$resource_map, pkg$data, public = FALSE) +# Set public read on all versions +set_public_read_all_versions(adc_test, pkg$resource_map) +} +} diff --git a/man/set_rights_and_access.Rd b/man/set_rights_and_access.Rd index c9efa3d..45266cf 100644 --- a/man/set_rights_and_access.Rd +++ b/man/set_rights_and_access.Rd @@ -2,26 +2,36 @@ % Please edit documentation in R/access.R \name{set_rights_and_access} \alias{set_rights_and_access} -\title{Set the given subject as the rightsHolder and subject with write and -changePermission access for the given PID.} +\title{Set rights holder with access policy for an object} \usage{ set_rights_and_access(mn, pids, subject, permissions = c("read", "write", "changePermission")) } \arguments{ -\item{mn}{(MNode) The Member Node to send the query.} +\item{mn}{(MNode) The Member Node.} -\item{pids}{(character) The PID(s) to set the access rule for.} +\item{pids}{(character) The PIDs of the objects to set the rights holder and access policy for.} -\item{subject}{(character)The subject of the rule(s).} +\item{subject}{(character) The identifier of the new rights holder, typically an ORCID or DN.} -\item{permissions}{(character) The permissions for the rule. Defaults to +\item{permissions}{(character) Optional. The permissions to set. Defaults to read, write, and changePermission.} } \value{ -Whether an update was needed. +(logical) Whether an update was needed. } \description{ -This function only updates the existing System Metadata if a change is -needed. +Set the given subject as the rights holder and with given permissions +for the given objects. This function only updates the existing +System Metadata when a change is needed. +} +\examples{ +\dontrun{ +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", + "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +set_rights_and_access(mn, pids, "http://orcid.org/0000-000X-XXXX-XXXX", + permissions = c("read", "write", "changePermission")) +} } diff --git a/man/set_rights_holder.Rd b/man/set_rights_holder.Rd index 09b59e0..a162643 100644 --- a/man/set_rights_holder.Rd +++ b/man/set_rights_holder.Rd @@ -2,23 +2,31 @@ % Please edit documentation in R/access.R \name{set_rights_holder} \alias{set_rights_holder} -\title{access.R} +\title{Set the rights holder for an object} \usage{ set_rights_holder(mn, pids, subject) } \arguments{ -\item{mn}{(MNode) The MNode instance to be changed.} +\item{mn}{(MNode) The Member Node.} -\item{pids}{(character) The identifiers for the object to be changed.} +\item{pids}{(character) The PIDs of the objects to set the rights holder for.} -\item{subject}{(character) The identifier of the new rightsHolder, often an ORCID or DN.} +\item{subject}{(character) The identifier of the new rights holder, typically an ORCID or DN.} +} +\value{ +(logical) Whether an update was needed. } \description{ -High-level utility functions for getting and setting access rules for DataONE -objects. -Set the rightsHolder field for a given PID. +Set the rights holder to the given subject for the given objects on the +given Member Node. This function checks if the rights holder is already set +and only updates the System Metadata when a change is needed. +} +\examples{ +\dontrun{ +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") +pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +"urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") +set_rights_holder(mn, pids, subjects = "http://orcid.org/0000-000X-XXXX-XXXX") } -\details{ -Update the rights holder to the provided subject for the object identified in -the provided system metadata document on the given Member Node. } diff --git a/man/show_indexing_status.Rd b/man/show_indexing_status.Rd index bb922ce..45d828e 100644 --- a/man/show_indexing_status.Rd +++ b/man/show_indexing_status.Rd @@ -7,21 +7,20 @@ show_indexing_status(mn, pids) } \arguments{ -\item{mn}{(MNode) The Member Node to query} +\item{mn}{(MNode) The Member Node to query.} -\item{pids}{(character|list) One or more PIDs (or list of PIDs)} +\item{pids}{(character/list) One or more PIDs.} } \value{ -Nothing +\code{NULL} } \description{ -Show the indexing status of a set of PIDs +Show the indexing status of a set of PIDs. } \examples{ \dontrun{ # Create a package then check its indexing status library(dataone) -library(arcticdatautils) mn <- MNode(...) pkg <- create_dummy_package(mn) show_indexing_status(mn, pkg) diff --git a/man/show_random_dataset.Rd b/man/show_random_dataset.Rd deleted file mode 100644 index 429ecaa..0000000 --- a/man/show_random_dataset.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{show_random_dataset} -\alias{show_random_dataset} -\title{Print a random dataset.} -\usage{ -show_random_dataset(inventory, theme = NULL, n = 10) -} -\arguments{ -\item{inventory}{(data.frame) An inventory.} - -\item{theme}{(character) Optional. A package theme name.} - -\item{n}{(numeric) Optional. The number of files to show.} -} -\value{ -Nothing. -} -\description{ -Print a random dataset. -} diff --git a/man/substitute_eml_party.Rd b/man/substitute_eml_party.Rd deleted file mode 100644 index ce89ef5..0000000 --- a/man/substitute_eml_party.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{substitute_eml_party} -\alias{substitute_eml_party} -\title{Extract the EML responsible-party blocks in a document, and parse the -surName field to create proper givenName/surName structure} -\usage{ -substitute_eml_party(path) -} -\arguments{ -\item{path}{file path to the EML document to process (character)} -} -\value{ -path (character) Path to the converted EML file. -} -\description{ -Extract the EML responsible-party blocks in a document, and parse the -surName field to create proper givenName/surName structure -} diff --git a/man/sysmeta_to_eml_other_entity.Rd b/man/sysmeta_to_eml_other_entity.Rd deleted file mode 100644 index a3d094d..0000000 --- a/man/sysmeta_to_eml_other_entity.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/eml.R -\name{sysmeta_to_eml_other_entity} -\alias{sysmeta_to_eml_other_entity} -\title{Create an EML otherEntity for the given object from the System Metadata} -\usage{ -sysmeta_to_eml_other_entity(sysmeta) -} -\arguments{ -\item{sysmeta}{(SystemMetadata) One or more System Metadata objects} -} -\value{ -(list of otherEntity) The otherEntity object(s) -} -\description{ -Create an EML otherEntity for the given object from the System Metadata -} -\examples{ -\dontrun{ -# Generate EML otherEntity objects for all the data in a package -pkg <- get_package(mn, pid) -sm <- lapply(pkg$data, function(pid) { getSystemMetadata(mn, pid) }) -sysmeta_to_other_entity(sm) -} -} diff --git a/man/sysmeta_to_eml_physical.Rd b/man/sysmeta_to_eml_physical.Rd index ee0772f..60ededf 100644 --- a/man/sysmeta_to_eml_physical.Rd +++ b/man/sysmeta_to_eml_physical.Rd @@ -2,26 +2,25 @@ % Please edit documentation in R/eml.R \name{sysmeta_to_eml_physical} \alias{sysmeta_to_eml_physical} -\title{Create an EML physical object from System Metadata} +\title{Create an EML physical object from system metadata} \usage{ sysmeta_to_eml_physical(sysmeta) } \arguments{ -\item{sysmeta}{(SystemMetadata) One or more System Metadata objects} +\item{sysmeta}{(SystemMetadata) One or more System Metadata objects.} } \value{ -(list of physical) The physical objects for each sysmeta +(list) A list of physical objects. } \description{ This function creates a pre-canned EML physical object from what's in the -System Metadata of an Object. Note that it sets an Online Distrubtion URL +System Metadata of an object. Note that it sets an Online Distribution URL of the DataONE v2 resolve service for the PID. } \examples{ -#' \dontrun { -# Generate EML physical objects for all the data in a package -pkg <- get_package(mn, pid) -sm <- lapply(pkg$data, function(pid) { getSystemMetadata(mn, pid) }) +\dontrun{ +# Generate EML physical object from a system metadata object +sm <- getSystemMetadata(mn, pid) sysmeta_to_eml_physical(sm) } } diff --git a/man/sysmeta_to_other_entity.Rd b/man/sysmeta_to_other_entity.Rd deleted file mode 100644 index 4ef8a59..0000000 --- a/man/sysmeta_to_other_entity.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/eml.R -\name{sysmeta_to_other_entity} -\alias{sysmeta_to_other_entity} -\alias{sysmeta_to_other_entity} -\title{This function is deprecated. See \link{pid_to_other_eml_entity}.} -\usage{ -sysmeta_to_other_entity(sysmeta) - -sysmeta_to_other_entity(sysmeta) -} -\arguments{ -\item{sysmeta}{(SystemMetadata)} - -\item{mn}{(MNode)} - -\item{pids}{(character)} -} -\description{ -This function is deprecated. See \link{pid_to_other_eml_entity}. - -This function is deprecated. See \link{sysmeta_to_eml_other_entity}. -} diff --git a/man/test_has_abstract.Rd b/man/test_has_abstract.Rd deleted file mode 100644 index 8c63190..0000000 --- a/man/test_has_abstract.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modify_metadata.R -\name{test_has_abstract} -\alias{test_has_abstract} -\title{modify_metadata.R -Author: Bryce Mecum } -\usage{ -test_has_abstract(path) -} -\description{ -Functions related to fixing invalid ISO metadata. -} -\details{ -Some functions just test whether a validation issue is present. These are -prefixed with the text "test". Exactly what they are testing should be -described in the docstrings. - -Other functons fix the bad metadata in place (modifying the original file) -and these functions are prefixed with "fix_". Exactly what they are fixing -should be described in the docstrings. - -Example usage: - -# Find and fix documents in 'mydir' that have extra whitespace in their -# topicCategory element(s) - -the_files <- dir(mydir) -bad_enums <- the_files[which(sapply(the_files, test_has_bad_enum))] -} diff --git a/man/theme_packages.Rd b/man/theme_packages.Rd deleted file mode 100644 index 1d391ad..0000000 --- a/man/theme_packages.Rd +++ /dev/null @@ -1,44 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/marking.R -\name{theme_packages} -\alias{theme_packages} -\title{marking.R -Author: Bryce Mecum } -\usage{ -theme_packages(inventory, nfiles_cutoff = 100) -} -\arguments{ -\item{inventory}{(data.frame) An Inventory.} -} -\value{ -(data.frame) An Inventory. -} -\description{ -R commands for marking datasets before adding. -Divide packages and their files into themes. -} -\details{ -Themes divide packages into groups based upon how the actions we will take -to insert them. Packages are divided into one of three themes: - -"many-files" - -The package has more files than we'd like to include in a Resource Map - and we will want to archive its contents before inserting. - -"has-versions" - -The package has version information embedded into its folder structure. - These packages will be hand-verified and inserted manually when a plan - is developed to insert them. - -"ready-to-go" - -All other packages not in the above themes. - -Note: Adds a 'theme' column to 'inventory'. -Note: Depeneds on the following columns: - -- filename - - package_nfiles -} diff --git a/man/update_object.Rd b/man/update_object.Rd index 0dd8cb3..edac8a0 100644 --- a/man/update_object.Rd +++ b/man/update_object.Rd @@ -2,9 +2,10 @@ % Please edit documentation in R/editing.R \name{update_object} \alias{update_object} -\title{Update an object with a new file.} +\title{Update an object with a new file} \usage{ -update_object(mn, pid, path, format_id = NULL, new_pid = NULL, sid = NULL) +update_object(mn, pid, path, format_id = NULL, new_pid = NULL, + sid = NULL) } \arguments{ \item{mn}{(MNode) The Member Node to update the object on.} @@ -13,13 +14,29 @@ update_object(mn, pid, path, format_id = NULL, new_pid = NULL, sid = NULL) \item{path}{(character) The full path to the file to update with.} -\item{format_id}{(character) Optional. The format ID to set for the object. When not set, \code{\link{guess_format_id}} will be used to guess the format ID. Should be a \href{https://cn.dataone.org/cn/v2/formats}{DataONE format ID}.} +\item{format_id}{(character) Optional. The format ID to set for the object. +When not set, \code{\link[=guess_format_id]{guess_format_id()}} will be used to guess the format ID. +Should be a \href{https://cn.dataone.org/cn/v2/formats}{DataONE format ID}.} + +\item{new_pid}{(character) Optional. Specify the PID for the new object. +Defaults to automatically generating a new, random UUID-style PID.} + +\item{sid}{(character) Optional. Specify a Series ID (SID) to use for the new object.} } \value{ (character) The PID of the updated object. } \description{ -This is a convenience wrapper around `dataone::updateObject` which copies in +This is a convenience wrapper around \code{\link[dataone:updateObject]{dataone::updateObject()}} which copies in fields from the old object's System Metadata such as the rightsHolder and accessPolicy and updates only what needs to be changed. } +\examples{ +\dontrun{ +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") +pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" +my_path <- "/home/Documents/myfile.csv" +new_pid <- update_object(mn, pid, my_path, format_id = "text/csv") +} +} diff --git a/man/update_package.Rd b/man/update_package.Rd deleted file mode 100644 index 83c3b04..0000000 --- a/man/update_package.Rd +++ /dev/null @@ -1,39 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/packaging.R -\name{update_package} -\alias{update_package} -\title{Update a package with modified metadata.} -\usage{ -update_package(inventory, package, env = NULL) -} -\arguments{ -\item{inventory}{(data.frame)} - -\item{package}{(character)} -} -\value{ -TRUE or FALSE depending on sucess (logical) -} -\description{ -The modified metadata should be set in the `env` variable. For example, if -your original metadata is: -} -\details{ -/home/you/originals/dir/a.xml - -and your modified metadata is in - -/home/someone_else/modified/dir/a.xml - -Then your env should be: - -env$base_path <- "/home/you/" -env$alternate_path <- "/home/someone_else" - -Note that the data files are not updated either so all that's happening is -the metadata object and resource map are being updated. - -Note that this function checks if the old objects (metadata and resource map) -exist on the Member Node before doing their work and will call createObject() -instead of updateObject() if the object didn't already exist. -} diff --git a/man/update_resource_map.Rd b/man/update_resource_map.Rd index 59e4351..e65bc95 100644 --- a/man/update_resource_map.Rd +++ b/man/update_resource_map.Rd @@ -2,45 +2,72 @@ % Please edit documentation in R/editing.R \name{update_resource_map} \alias{update_resource_map} -\title{Update an existing resource map Object on a Member Node.} +\title{Update an existing resource map object on a Member Node} \usage{ update_resource_map(mn, resource_map_pid, metadata_pid, data_pids = NULL, child_pids = NULL, other_statements = NULL, identifier = NULL, - public = FALSE, check_first = TRUE) + public = TRUE, check_first = TRUE, keep_prov = FALSE) } \arguments{ -\item{other_statements}{(data.frame) Extra statements to add to the Resource Map.} +\item{mn}{(MNode) The Member Node.} -\item{identifier}{} +\item{resource_map_pid}{(character) The PID of the resource map to be updated.} -\item{public}{Whether or not to make the new resource map public read -(logical)} +\item{metadata_pid}{(character) The PID of the metadata object to go in the package.} + +\item{data_pids}{(character) The PID(s) of the data objects to go in the package.} + +\item{child_pids}{(character) The resource map PIDs of the packages to be +nested under the package.} + +\item{other_statements}{(data.frame) Extra statements to add to the resource map.} + +\item{identifier}{(character) Manually specify the identifier for the new metadata object.} + +\item{public}{(logical) Whether or not to make the new resource map public read.} \item{check_first}{(logical) Optional. Whether to check the PIDs passed in as -aruments exist on the MN before continuing. This speeds up the function, -especially when `data_pids` has many elements.} +arguments exist on the MN before continuing. This speeds up the function, +especially when \code{data_pids} has many elements.} + +\item{keep_prov}{(character) Option to force prov to be forwarded into new resource map} +} +\value{ +(character) The PID of the updated resource map. } \description{ This function first generates a new resource map RDF/XML document locally and -then uses the dataone::updateObject function to update an Object on the +then uses the \code{\link[dataone:updateObject]{dataone::updateObject()}} function to update an object on the specified MN. } \details{ -If you only want to generate resource map RDF/XML, see -\code{\link{generate_resource_map}}. +If you only want to generate resource map RDF/XML, see \code{\link[=generate_resource_map]{generate_resource_map()}}. -This function also can be used to be used to add a new child packages to a -parent package. For exmaple, if you have: +This function also can be used to add a new child packages to a +parent package. For example, if you have: Parent A B -and want to add C as a sibling package to A and B, e.g. +and want to add C as a sibling package to A and B, e.g.: Parent A B C -you could use this function. +then you could use this function. -Note: This function currently replaces the rightsHolder on the Resource Map +Note: This function currently replaces the rightsHolder on the resource map temporarily to allow updating but sets it back to the rightsHolder that was in place before the update. } +\examples{ +\dontrun{ +cn <- CNode('STAGING2') +mn <- getMNode(cn,"urn:node:mnTestKNB") + +rm_pid <- "resource_map_urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" +meta_pid <- "urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe" +data_pids <- c("urn:uuid:3e5307c4-0bf3-4fd3-939c-112d4d11e8a1", +"urn:uuid:23c7cae4-0fc8-4241-96bb-aa8ed94d71fe") + +rm_new <- update_resource_map(mn, rm_pid, meta_pid, data_pids) +} +} diff --git a/man/validate_environment.Rd b/man/validate_environment.Rd deleted file mode 100644 index c8127bd..0000000 --- a/man/validate_environment.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/packaging.R -\name{validate_environment} -\alias{validate_environment} -\title{Validate an environment.} -\usage{ -validate_environment(env) -} -\arguments{ -\item{env}{} -} -\description{ -Validate an environment. -} diff --git a/man/validate_inventory.Rd b/man/validate_inventory.Rd deleted file mode 100644 index 83b84d0..0000000 --- a/man/validate_inventory.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/packaging.R -\name{validate_inventory} -\alias{validate_inventory} -\title{Validate an Inventory.} -\usage{ -validate_inventory(inventory) -} -\arguments{ -\item{inventory}{} -} -\description{ -Validate an Inventory. -} diff --git a/man/view_packages.Rd b/man/view_packages.Rd deleted file mode 100644 index 632ac5e..0000000 --- a/man/view_packages.Rd +++ /dev/null @@ -1,12 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/interactive.R -\name{view_packages} -\alias{view_packages} -\title{interactive.R -Author: Bryce Mecum } -\usage{ -view_packages(inventory) -} -\description{ -Functions for interactive viewing of the Inventory and other objects. -} diff --git a/man/view_profile.Rd b/man/view_profile.Rd index 38ab472..605fd21 100644 --- a/man/view_profile.Rd +++ b/man/view_profile.Rd @@ -10,28 +10,30 @@ view_profile(mn, subject, fields = c("identifier", "title")) \item{mn}{(MNode) The Member Node to query.} \item{subject}{(character) The subject to find the datasets for. This is -likely going to be your ORCID, e.g. http://orcid.org....} +likely going to be an ORCID, e.g. http://orcid.org....} \item{fields}{(character) A vector of Solr fields to return.} } \value{ -(data.frame) data.frame with the results. +(data.frame) A data.frame with the results. } \description{ -This function is intended to be (poorly) simulate what a user sees when they +This function is intended to (poorly) simulate what a user sees when they browse to their "My Data Sets" page (their #profile URL). It uses a similar -Solr to what Metacat UI uses to generate the list. The results of this -function may be the same as what's on the #profile page but may be missing -some of the user's datasets when: +Solr query to what Metacat UI uses to generate the list. } \details{ -- The user has any datasets in their #profile that the person running the +The results of this function may be the same as what's on the #profile page +but may be missing some of the user's datasets when: +\itemize{ +\item The user has any datasets in their #profile that the person running the query (you) can't \code{read}. This is rare on arcticdata.io but possible because arctic-data-admins usually has read/write/changePermission permissions on every object. -- The user has datasets owned by an Equivalent Identity of the \code{subject} +\item The user has datasets owned by an Equivalent Identity of the \code{subject} being queried. This is rare, especially on arcticdata.io. } +} \examples{ \dontrun{ options(...set...your...token....) @@ -41,5 +43,12 @@ profile(mn, me) // Get a custom set of fields view_profile(mn, me, "origin") + +# Set environment +cn <- CNode("STAGING2") +mn <- getMNode(cn,"urn:node:mnTestKNB") + +package_df <- view_profile(mn, "http://orcid.org/0000-0003-4703-1974", fields = c("title")) + } } diff --git a/man/warn_current_version.Rd b/man/warn_current_version.Rd deleted file mode 100644 index e9ff0a7..0000000 --- a/man/warn_current_version.Rd +++ /dev/null @@ -1,13 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{warn_current_version} -\alias{warn_current_version} -\title{Warns if the currently-installed version of the package is not the same -version as the latest release on GitHub.} -\usage{ -warn_current_version() -} -\description{ -Warns if the currently-installed version of the package is not the same -version as the latest release on GitHub. -} diff --git a/man/which_in_eml.Rd b/man/which_in_eml.Rd new file mode 100644 index 0000000..85ab3d2 --- /dev/null +++ b/man/which_in_eml.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml.R +\name{which_in_eml} +\alias{which_in_eml} +\title{Search through EMLs} +\usage{ +which_in_eml(doc, element, test) +} +\arguments{ +\item{doc}{(list) An EML object.} + +\item{element}{(character) Element to evaluate.} + +\item{test}{(function/character) A function to evaluate (see examples). If test is a character, +will evaluate if \code{element == test} (see example 1).} +} +\description{ +This function returns indices within an EML list that contain an instance where +\code{test == TRUE}. See examples for more information. +} +\examples{ +\dontrun{ +# Question: Which creators have a surName "Smith"? +n <- which_in_eml(eml$dataset$creator, "surName", "Smith") +# Answer: eml$dataset$creator[n] + +# Question: Which dataTables have an entityName that begins with "2016" +n <- which_in_eml(eml$dataset$dataTable, "entityName", function(x) {grepl("^2016", x)}) +# Answer: eml$dataset$dataTable[n] + +# Question: Which attributes in dataTable[[1]] have a numberType "natural"? +n <- which_in_eml(eml$dataset$dataTable[[1]]$attributeList$attribute, "numberType", "natural") +# Answer: eml$dataset$dataTable[[1]]$attributeList$attribute[n] + +#' # Question: Which dataTables have at least one attribute with a numberType "natural"? +n <- which_in_eml(eml$dataset$dataTable, "numberType", function(x) {"natural" \%in\% x}) +# Answer: eml$dataset$dataTable[n] +} +} +\author{ +Mitchell Maier mitchell.maier@gmail.com +} diff --git a/tests/testthat/test_access.R b/tests/testthat/test_access.R index 4a53870..a30ad06 100644 --- a/tests/testthat/test_access.R +++ b/tests/testthat/test_access.R @@ -1,6 +1,6 @@ -context("access") +context("Access rules") -mn <- env_load()$mn +mn <- tryCatch(env_load()$mn, error = function(e) env_load()$mn) test_that("get_package works for a simple package", { if (!is_token_set(mn)) { @@ -8,11 +8,13 @@ test_that("get_package works for a simple package", { } pkg <- create_dummy_package(mn) - get_pkg <- get_package(mn, pkg$metadata) + Sys.sleep(1) + get_pkg <- get_package(mn, pkg$resource_map) expect_true(pkg$metadata == get_pkg$metadata) expect_true(pkg$resource_map == get_pkg$resource_map) expect_true(pkg$data == get_pkg$data) + expect_warning(get_package(mn, pkg$metadata)) }) test_that("get_package works for a package with a child package", { @@ -59,8 +61,37 @@ test_that("get_package works the same when given a metadata pid as it does when } child_pkg <- create_dummy_package(mn) - a <- get_package(mn, child_pkg$metadata) + a <- suppressWarnings(get_package(mn, child_pkg$metadata)) b <- get_package(mn, child_pkg$resource_map) expect_equal(a, b) }) + +test_that("access functions stop if system metadata is not found", { + expect_error(set_rights_holder(mn, "test", "http://orcid.org/0000-000X-XXXX-XXXX")) + + expect_error(set_access(mn, "test", "http://orcid.org/0000-000X-XXXX-XXXX")) + + expect_error(remove_access(mn, "test", "http://orcid.org/0000-000X-XXXX-XXXX")) + + expect_error(set_public_read(mn, "test")) + + expect_error(remove_public_read(mn, "test")) + + expect_error(set_rights_and_access(mn, "test", "http://orcid.org/0000-000X-XXXX-XXXX")) +}) + +test_that("is_public_read returns true for public packages and false for private packages", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + pkg <- create_dummy_package(mn, size = 3) + + public_response <- is_public_read(mn, pkg$resource_map) + remove_public_read(mn, pkg$resource_map) + private_response <- is_public_read(mn, pkg$resource_map) + + expect_true(public_response) + expect_false(private_response) +}) diff --git a/tests/testthat/test_dataone.R b/tests/testthat/test_dataone.R index f005812..6caa830 100644 --- a/tests/testthat/test_dataone.R +++ b/tests/testthat/test_dataone.R @@ -1,6 +1,6 @@ -context("dataone") +context("Helpers for the dataone package") -node <- env_load()$node +node <- env_load()$mn test_that("permissions can be checked", { if (!is_token_set(node)) { diff --git a/tests/testthat/test_dataone_formats.R b/tests/testthat/test_dataone_formats.R deleted file mode 100644 index 5dc6ce0..0000000 --- a/tests/testthat/test_dataone_formats.R +++ /dev/null @@ -1,7 +0,0 @@ -context("formats") - -test_that("a format can be returned", { - fmt <- format_eml() - expect_is(fmt, "character") - expect_gt(length(fmt), 0) -}) diff --git a/tests/testthat/test_editing.R b/tests/testthat/test_editing.R index 458a4db..c2ab42b 100644 --- a/tests/testthat/test_editing.R +++ b/tests/testthat/test_editing.R @@ -1,4 +1,4 @@ -context("editing") +context("Editing and managing data packages") mn <- env_load()$mn @@ -15,7 +15,8 @@ test_that("we can publish an update", { update <- publish_update(mn, package$metadata, package$resource_map, - package$data) + package$data, + check_first = FALSE) expect_named(update, c("metadata", "resource_map", "data")) expect_true(all(object_exists(mn, unlist(update)))) @@ -36,7 +37,8 @@ test_that("an identifier can be manually specified when publishing an update", { package$metadata, package$resource_map, package$data, - identifier = new_identifier) + identifier = new_identifier, + check_first = FALSE) expect_equal(update$metadata, new_identifier) }) @@ -53,7 +55,7 @@ test_that("we can create a resource map", { response <- create_resource_map(mn, metadata_pid, data_pid) expect_true(object_exists(mn, response)) - expect_equal(response, get_package(mn, metadata_pid)$resource_map) + expect_equal(response, get_package(mn, response)$resource_map) }) @@ -121,7 +123,7 @@ test_that("SIDs are maintained when publishing an update to an object with a SID sid = new_sid) resmap_pid <- create_resource_map(mn, metadata_pid = pid) - response <- publish_update(mn, pid, resmap_pid) + response <- publish_update(mn, pid, resmap_pid, check_first = FALSE) sysmeta <- getSystemMetadata(mn, response$metadata) expect_equal(sysmeta@seriesId, new_sid) @@ -143,8 +145,10 @@ test_that("we can publish an update to an object", { csv <- data.frame(x = 1:50) write.csv(csv, tmp) - upd <- update_object(mn, old, tmp) - file.remove(tmp) + suppressWarnings({ + upd <- update_object(mn, old, tmp) + }) + sm <- dataone::getSystemMetadata(mn, upd) expect_equal(sm@fileName, basename(tmp)) @@ -171,55 +175,205 @@ test_that("we can publish an update to an object and specify our own format id", expect_equal(sm@formatId, "text/plain") }) -test_that("extra statements are maintained between updates", { +test_that("rightsholder is properly set back after publishing an update", { if (!is_token_set(mn)) { skip("No token set. Skipping test.") } - pkg <- create_dummy_package(mn, 3) + pkg <- create_dummy_package(mn) + + set_result <- set_rights_holder(mn, unlist(pkg), "CN=arctic-data-admins,DC=dataone,DC=org") + expect_true(all(set_result)) + + new_pkg <- publish_update(mn, pkg$metadata, pkg$resource_map, pkg$data, check_first = FALSE) + rhs <- lapply(unlist(pkg), function(pid) { + dataone::getSystemMetadata(mn, pid)@rightsHolder + }) - # Add some PROV triples to the Resource Map - rm <- tempfile() - writeLines(rawToChar(dataone::getObject(mn, pkg$resource_map)), rm) - # statements <- data.frame(subject = paste0("https://cn.dataone.org/cn/v2/resolve/", URLencode(pkg$data[1], reserved = TRUE)), - # predicate = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", - # object = "http://www.w3.org/ns/prov#Entity") - # - # statements <- rbind(statements, - # data.frame(subject = paste0("https://cn.dataone.org/cn/v2/resolve/", URLencode(pkg$data[2], reserved = TRUE)), - # predicate = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", - # object = "http://www.w3.org/ns/prov#Entity")) + expect_true(all(unlist(rhs) == "CN=arctic-data-admins,DC=dataone,DC=org")) +}) - statements <- data.frame(subject = paste0("https://cn.dataone.org/cn/v2/resolve/", URLencode(pkg$data[1], reserved = TRUE)), - predicate = "http://www.w3.org/ns/prov#wasDerivedFrom", - object = paste0("https://cn.dataone.org/cn/v2/resolve/", URLencode(pkg$data[2], reserved = TRUE))) +test_that("publish update returns an error if its arguments are malformed", { + expect_error(publish_update(mn, metadata_pid = 1)) + expect_error(publish_update(mn, metadata_pid = "a", resource_map_pid = "b", data_pids = list(1, 2, 3))) +}) - new_rm <- update_resource_map(mn, pkg$resource_map, pkg$metadata, pkg$data, other_statements = statements, public = TRUE) +test_that("update_object updates the packageId for EML object updates", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } - rm <- tempfile() - writeLines(rawToChar(dataone::getObject(mn, new_rm)), rm) - statements <- parse_resource_map(rm) - expect_true("http://www.w3.org/ns/prov#wasDerivedFrom" %in% statements$predicate) + eml_pid <- create_dummy_metadata(mn) + eml_path <- tempfile(fileext = ".xml") + writeBin(dataone::getObject(mn, eml_pid), eml_path) + new_pid <- update_object(mn, eml_pid, eml_path, format_id = format_eml("2.1")) + updated_eml_path <- tempfile(fileext = ".xml") + writeBin(dataone::getObject(mn, new_pid), updated_eml_path) - new_new_rm <- update_resource_map(mn, new_rm, pkg$metadata, pkg$data, public = TRUE) - rm <- tempfile() - writeLines(rawToChar(dataone::getObject(mn, new_new_rm)), rm) - statements <- parse_resource_map(rm) - expect_true("http://www.w3.org/ns/prov#wasDerivedFrom" %in% statements$predicate) + doc <- xml2::read_xml(updated_eml_path) + expect_equal(new_pid, xml2::xml_attr(xml2::xml_root(doc), "packageId")) }) +test_that("publish_update removes the deprecated eml@access element", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } -test_that("rightsholder is properly set back after publishing an update", { - pkg <- create_dummy_package(mn) + pids <- create_dummy_package(mn) + eml_path <- tempfile(fileext = ".xml") + writeBin(dataone::getObject(mn, pids$metadata), eml_path) - set_result <- set_rights_holder(mn, unlist(pkg), "CN=arctic-data-admins,DC=dataone,DC=org") - expect_true(all(set_result)) + doc <- EML::read_eml(eml_path) + # Populate dummy access element + doc$access <- list(allow = "hello") + write_eml(doc, eml_path) - new_pkg <- publish_update(mn, pkg$metadata, pkg$resource_map, pkg$data) - rhs <- lapply(unlist(pkg), function(pid) { - dataone::getSystemMetadata(mn, pid)@rightsHolder - }) + new_pids <- publish_update(mn, pids$metadata, pids$resource_map, metadata_path = eml_path) + updated_eml_path <- tempfile(fileext = ".xml") + writeBin(dataone::getObject(mn, new_pids$metadata), updated_eml_path) + + new_eml <- EML::read_eml(updated_eml_path) + expect_equal(0, length(new_eml$access$allow)) +}) + +test_that("publishing an object with a valid format ID succeeds", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + tmp_path <- tempfile() + writeLines(LETTERS, tmp_path) + + expect_is(publish_object(mn, tmp_path, "text/plain"), "character") +}) + +test_that("publishing an object with an invalid format ID fails", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + tmp_path <- tempfile() + writeLines(LETTERS, tmp_path) + + expect_error(publish_object(mn, tmp_path, "asdf/asdf")) +}) + +test_that("publish_update removes 'resource_map_pid' from 'parent_child_pids' argument", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + parent <- create_dummy_package(mn) + child <- create_dummy_package(mn) + + # Nest packages + parent["resource_map"] <- update_resource_map(mn, + parent$resource_map, + parent$metadata, + parent$data, + child$resource_map, + check_first = F) + + # Updating parent incorrectly should still run (with parent resource_map listed in 'parent_parent_pids') + child <- publish_update(mn, + child$metadata, + child$resource_map, + child$data, + parent_resmap_pid = parent$resource_map, + parent_metadata_pid = parent$metadata, + parent_data_pids = parent$data, + parent_child_pids = child$resource_map, check_first = F) + parent <- get_package(mn, child$parent_resource_map) + + expect_equal(child$resource_map, parent$child_packages) +}) + +test_that("publish_update errors if the non-current resource map or metadata pid is provided", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + pkg1 <- create_dummy_package(mn) + pkg2 <- publish_update(mn, pkg1$metadata, pkg1$resource_map, pkg1$data) + + expect_error(publish_update(mn, pkg1$metadata, pkg2$resource_map, pkg2$data)) + expect_error(publish_update(mn, pkg2$metadata, pkg1$resource_map, pkg2$data)) +}) + + +test_that("publish_update can replace an EML 2.1.1 record with a 2.2.0 record", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + meta <- publish_object(mn, + path = file.path( + system.file(package = "arcticdatautils"), + "example-eml.xml"), + format_id = format_eml("2.1")) + ore <- create_resource_map(mn, meta) + pkg <- publish_update(mn, meta, ore, format_id = format_eml("2.2")) + sm <- getSystemMetadata(mn, pkg$metadata) + + expect_equal(sm@formatId, format_eml("2.2")) +}) + +test_that("PROV is carried forward if data pids don't change", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + # Make a test package and add prov + package <- create_dummy_package(mn, size = 3) + package_prov <- suppressMessages(add_dummy_prov(mn, package$resource_map)) + + # Publish an update on it + update <- publish_update(mn, + package$metadata, + package_prov, + package$data, + check_first = FALSE) + + t <- recover_prov(mn, update$resource_map) + + prov_pids <- gsub("https://cn-stage-2.test.dataone.org/cn/v[0-9]/resolve/|https://cn.dataone.org/cn/v[0-9]/resolve/|https://cn-stage.test.dataone.org/cn/v[0-9]/resolve/", "", c(t$subject, t$object)) %>% + gsub("%3A", ":", .) + prov_pids <- prov_pids[-(grep("^http", prov_pids))] %>% + unique(.) + + expect_equal(sort(prov_pids), sort(update$data)) + +}) + +test_that("PROV is handled with appropriate warnings", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + # Make a test package and add prov + package <- create_dummy_package(mn, size = 3) + package_prov <- suppressMessages(add_dummy_prov(mn, package$resource_map)) + data_new <- create_dummy_object(mn) + # Publish an update on it + expect_warning(update <- publish_update(mn, + package$metadata, + package_prov, + data_new, + keep_prov = TRUE, + check_first = FALSE), "Provenance information is retained") + + # Make a test package and add prov + package <- create_dummy_package(mn, size = 3) + package_prov <- suppressMessages(add_dummy_prov(mn, package$resource_map)) + data_new <- create_dummy_object(mn) + + # Publish an update on it + expect_warning(update <- publish_update(mn, + package$metadata, + package_prov, + data_new, + keep_prov = FALSE, + check_first = FALSE), "Provenance information will be removed") - expect_true(all(unlist(rhs) == "CN=arctic-data-admins,DC=dataone,DC=org")) }) diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index 6244149..064f257 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -2,130 +2,384 @@ context("EML") mn <- env_load()$mn -test_that("an EML otherEntity subtree can be created when the sysmeta has a filename", { - x <- file.path(system.file("tests", "testfiles", package = "arcticdatautils"), "example-sysmeta.xml") - doc <- XML::xmlParse(x) - sysmeta <- new("SystemMetadata") - sysmeta <- datapack::parseSystemMetadata(sysmeta, XML::xmlRoot(doc)) +test_that("a creator can be created", { + creator <- eml_creator("tester", "user") + + expect_equal(creator$individualName$givenName, "tester") + expect_equal(creator$individualName$surName, "user") +}) - other_entity <- sysmeta_to_other_entity(sysmeta)[[1]] +test_that("a contact can be created", { + contact <- eml_contact("test", "user") - # Check some rough properties of the subtree - expect_is(other_entity, "otherEntity") - expect_equal(other_entity@entityName@.Data, "some_file.bin") - expect_equal(other_entity@physical[[1]]@dataFormat@externallyDefinedFormat@formatName, "application/octet-stream") + expect_equal(contact$individualName$givenName, "test") + expect_equal(contact$individualName$surName, "user") }) -test_that("an EML otherEntity subtree can be created when the sysmeta doesn't have a filename ", { - x <- file.path(system.file("tests", "testfiles", package = "arcticdatautils"), "example-sysmeta-nofilename.xml") - doc <- XML::xmlParse(x) - sysmeta <- new("SystemMetadata") - sysmeta <- datapack::parseSystemMetadata(sysmeta, XML::xmlRoot(doc)) +test_that("a personnel can be created", { + personnel <- eml_personnel(given_names = "test", sur_name = "user", role = "principalInvestigator", userId = "https://orcid.org/WWWW-XXXX-YYYY-ZZZZ") - other_entity <- sysmeta_to_other_entity(sysmeta)[[1]] + expect_equal(personnel$individualName$givenName, "test") + expect_equal(personnel$individualName$surName, "user") + expect_equal(personnel$role, "principalInvestigator") +}) - # Check some rough properties of the subtree - expect_is(other_entity, "otherEntity") - expect_equal(other_entity@entityName@.Data, "NA") - expect_equal(other_entity@physical[[1]]@dataFormat@externallyDefinedFormat@formatName, "application/octet-stream") +test_that("a project can be created", { + test_personnel_1 <- eml_personnel(given_names = "A", sur_name = "User", organization = "NCEAS", role = "originator") + + project <- eml_project("some title", + list(test_personnel_1), + "This is a test abstract", + "I won an award, yay") + + expect_equal(project$title, "some title") + expect_equal(project$personnel[[1]]$individualName$givenName, "A") + expect_equal(project$personnel[[1]]$individualName$surName, "User") + expect_equal(project$personnel[[1]]$organizationName, "NCEAS") + expect_equal(project$personnel[[1]]$role, "originator") + expect_equal(project$funding$para, "I won an award, yay") +}) + +test_that("a project can be created with multiple personnel, an abstract can be created with multiple paragraphs, awards with multiple awards", { + test_personnel_1 <- eml_personnel(given_names = "A", sur_name = "User", organization = "NCEAS", role = "originator") + test_personnel_2 <- eml_personnel(given_names = "Testy", sur_name = "Mactesterson", organization = "A Test Org", role = list("user", "author")) + + project <- eml_project("some title", + list(test_personnel_1, test_personnel_2), + list("This is a test abstract", "This is the second paragraph"), + list("I won an award, yay", "I won a second award, wow")) + + expect_equal(project$title, "some title") + expect_equal(project$personnel[[2]]$individualName$givenName, "Testy") + expect_equal(project$personnel[[2]]$individualName$surName, "Mactesterson") + expect_equal(project$personnel[[2]]$organizationName, "A Test Org") + expect_equal(project$personnel[[2]]$role[[2]], "author") + expect_equal(project$abstract$para[[2]], "This is the second paragraph") + expect_equal(project$funding$para[[2]], "I won a second award, wow") }) -test_that("a methods step can be added to an EML document", { - library(XML) - library(EML) +test_that("a dataTable and otherEntity can be added from a pid", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + data_path <- tempfile() + writeLines(LETTERS, data_path) + pid1 <- publish_object(mn, data_path, "text/csv") + pid2 <- publish_object(mn, data_path, "text/csv") + + eml_path <- file.path(system.file(package = "arcticdatautils"), "example-eml.xml") + + doc <- EML::read_eml(eml_path) + + dummy_factors <- c("factor 1", "factor 2") + dummy_attributes <- create_dummy_attributes_dataframe(10, dummy_factors) + dummy_enumeratedDomain <- create_dummy_enumeratedDomain_dataframe(dummy_factors) + + dummy_attributeList <- EML::set_attributes(dummy_attributes, factors = dummy_enumeratedDomain) + dummy_entityName <- "Test_Name" + dummy_entityDescription <- "Test_Description" + + # Create an otherEntity + OE <- pid_to_eml_entity(mn, pid1, + entityName = dummy_entityName, + entityDescription = dummy_entityDescription, + attributeList = dummy_attributeList) + + expect_true(OE$entityName == dummy_entityName) + expect_true(OE$entityDescription == dummy_entityDescription) - doc <- new("eml") - doc <- add_methods_step(doc, "title", "description") + # Create a dataTable + DT <- pid_to_eml_entity(mn, pid2, + entity_type = "dataTable", + entityName = dummy_entityName, + entityDescription = dummy_entityDescription, + attributeList = dummy_attributeList) - expect_equal(XML::xmlValue(doc@dataset@methods@methodStep[[1]]@description@section[[1]]@.Data[[1]]), "title") - expect_equal(XML::xmlValue(doc@dataset@methods@methodStep[[1]]@description@section[[1]]@.Data[[2]]), "description") + expect_true(DT$entityName == dummy_entityName) + expect_true(DT$entityDescription == dummy_entityDescription) + + doc$dataset$otherEntity <- OE + expect_true(EML::eml_validate(doc)) + + doc$dataset$dataTable <- DT + expect_true(EML::eml_validate(doc)) + + unlink(data_path) }) -test_that("multiple method steps can be added to an EML document", { - library(XML) - library(EML) +test_that("eml_otherEntity_to_dataTable fails gracefully", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + doc <- read_eml(system.file("example-eml.xml", package = "arcticdatautils")) + + # incorrect inputs + expect_error(eml_otherEntity_to_dataTable("dummy input")) + expect_error(eml_otherEntity_to_dataTable(doc, "1")) - doc <- new("eml") - doc <- add_methods_step(doc, "title", "description") - doc <- add_methods_step(doc, "another", "method") + # subscripts out of bounds + expect_error(eml_otherEntity_to_dataTable(doc, 2)) + + # Duplicate entityNames found + doc$dataset$otherEntity <- list(doc$dataset$otherEntity, doc$dataset$otherEntity) + expect_error(eml_otherEntity_to_dataTable(doc, 1)) - expect_length(doc@dataset@methods@methodStep, 2) }) -test_that("methods can be cleared from an EML document", { - library(EML) +test_that("eml_otherEntity_to_dataTable works", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + doc <- read_eml(system.file("example-eml.xml", package = "arcticdatautils")) + doc$dataset$otherEntity$attributeList <- EML::set_attributes(create_dummy_attributes_dataframe(1)) + otherEntity <- doc$dataset$otherEntity - doc <- new("eml") - doc <- add_methods_step(doc, "title", "description") + doc <- eml_otherEntity_to_dataTable(doc, 1) - expect_length(doc@dataset@methods@methodStep, 1) + # test that otherEntity was removed + expect_length(doc$dataset$otherEntity, 0) - doc <- clear_methods(doc) - expect_length(doc@dataset@methods@methodStep, 0) + # test that dataTable was added + expect_equal(otherEntity$entityName, doc$dataset$dataTable$entityName) + expect_equivalent(otherEntity$physical, doc$dataset$dataTable$physical) }) -test_that("a creator can be created", { - creator <- eml_creator("test", "user") +test_that("which_in_eml returns correct locations", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } - expect_is(creator, "creator") - expect_equal(creator@individualName[[1]]@givenName[[1]]@.Data, "test") - expect_equal(creator@individualName[[1]]@surName@.Data, "user") + attributes <- + data.frame( + attributeName = c( + "length_1", + "time_2", + "length_3"), + attributeDefinition = c( + "def 1", + "def 2", + "def 3"), + formatString = c( + NA, + NA, + NA), + measurementScale = c( + "ratio", + "ratio", + "ratio"), + domain = c( + "numericDomain", + "numericDomain", + "numericDomain"), + definition = c( + NA, + NA, + NA), + unit = c( + "meter", + "second", + "meter"), + numberType = c( + "real", + "real", + "real"), + stringsAsFactors = FALSE + ) + + attributeList <- EML::set_attributes(attributes) + + dataTable_1 <- list( + entityName = "2016_data.csv", + entityDescription = "2016 data", + attributeList = attributeList) + + dataTable_2 <- dataTable_1 + + dataTable_3 <- list( + entityName = "2015_data.csv", + entityDescription = "2016 data", + attributeList = attributeList) + + creator_1 <- list( + individualName = list(individualName = list( + surName = "LAST", + givenName = "FIRST"))) + creator_2 <- list( + individualName = list(individualName = list( + surName = "LAST", + givenName = "FIRST_2"))) + creator_3 <- creator_2 + + title <- "Title" + + dataset <- list(dataset = list( + title = title, + creator = list(creator_1, creator_2, creator_3), + dataTable = list(dataTable_1, dataTable_2, dataTable_3))) + + doc <- dataset + + expect_equal(c(2,3), which_in_eml(doc$dataset$creator, "givenName", "FIRST_2")) + expect_error(which_in_eml(doc$dataset$dataTable, "attributeName", "length_3")) # not sure why this should fail? + expect_equal(c(1,3), which_in_eml(doc$dataset$dataTable[[1]]$attribute, "attributeName", function(x) {grepl("^length", x)})) }) -test_that("a contact can be created", { - contact <- eml_contact("test", "user") +test_that('eml_set_reference sets a reference', { + eml_path <- file.path(system.file(package = "arcticdatautils"), "example-eml.xml") + doc <- EML::read_eml(eml_path) - expect_is(contact, "contact") - expect_equal(contact@individualName[[1]]@givenName[[1]]@.Data, "test") - expect_equal(contact@individualName[[1]]@surName@.Data, "user") + expect_error(eml_set_reference(doc$dataset$creator, doc$dataset$contact)) + + # Add id to use references + doc$dataset$creator$id <- 'creator_id' + doc$dataset$contact <- eml_set_reference(doc$dataset$creator, doc$dataset$contact) + + expect_equal(doc$dataset$creator$id, doc$dataset$contact$references) + expect_true(EML::eml_validate(doc)) }) +test_that('eml_set_shared_attributes creates shared attribute references', { + eml_path <- file.path(system.file(package = 'arcticdatautils'), 'example-eml.xml') + doc <- EML::read_eml(eml_path) + + attributes <- data.frame(attributeName = 'length_1', attributeDefinition = 'def1', + formatString = NA, measurementScale = 'ratio', domain = 'numericDomain', + definition = NA, unit = 'meter', numberType = 'real', + stringsAsFactors = FALSE) + attributeList <- EML::set_attributes(attributes) -test_that("a project can be created", { - project <- eml_project("some title", "12345", "a", "user") + dataTable_1 <- list(entityName = '2016_data.csv', + entityDescription = '2016 data', + attributeList = attributeList) + dataTable_2 <- dataTable_1 + doc$dataset$dataTable <- list(dataTable_1, dataTable_2) + + doc <- eml_set_shared_attributes(doc) - expect_is(project, "project") - expect_equal(project@title[[1]]@.Data, "some title") - expect_equal(project@personnel[[1]]@individualName[[1]]@givenName[[1]]@.Data, "a") - expect_equal(project@personnel[[1]]@individualName[[1]]@surName@.Data, "user") - expect_equal(project@funding@para[[1]]@.Data[[1]], "12345") + expect_equal(doc$dataset$dataTable[[1]]$attributeList$id, doc$dataset$dataTable[[2]]$attributeList$references) + expect_true(EML::eml_validate(doc)) }) +test_that('eml_party creates multiple givenName, organizationName, and positionName fields', { + creator <- eml_party('creator', c('John', 'and Jack'), 'Smith', c('NCEAS', 'UCSB'), + c('Programmers', 'brothers')) -test_that("a project can be created with multiple awards", { - project <- eml_project("some title", c("12345", "54321"), "a", "user") + expect_equal(EML::eml_get(creator, 'givenName'), EML::as_emld(list('John', 'and Jack'))) + expect_equal(EML::eml_get(creator, 'organizationName'), EML::as_emld(list('NCEAS', 'UCSB'))) + expect_equal(EML::eml_get(creator, 'positionName'), EML::as_emld(list('Programmers', 'brothers'))) +}) - expect_length(project@funding@para, 2) - expect_equal(project@funding@para[[1]]@.Data[[1]], "12345") - expect_equal(project@funding@para[[2]]@.Data[[1]], "54321") +test_that('reorder_pids orders pids correctly', { + me <- list(individualName = list(givenName = "Jeanette", surName = "Clark")) + oe1 <- list(entityName = "object one", entityType = "other") + oe2 <- list(entityName = "object two", entityType = "other") + doc <- list(packageId = "an id", system = "a system", + dataset = list( + title = "A Mimimal Valid EML Dataset", + creator = me, + contact = me, + otherEntity = list(oe1, oe2))) + + pid_list <- list("object two" = "some identifier2", "object one" = "some identifier1") + + ordered_pids <- reorder_pids(pid_list, doc) + entity_names <- eml_get_simple(doc, "entityName") + expect_equal(names(ordered_pids), entity_names) }) -test_that("a project can be created with multiple organizations", { - project <- eml_project("some title", "12345", "a", "user", organizations = c("org1", "org2")) +test_that('reorder_pids fails gracefully', { + me <- list(individualName = list(givenName = "Jeanette", surName = "Clark")) + oe1 <- list(entityName = "object one", entityType = "other") + doc <- list(packageId = "an id", system = "a system", + dataset = list( + title = "A Mimimal Valid EML Dataset", + creator = me, + contact = me, + otherEntity = list(oe1))) + + pid_list <- list("object two" = "some identifier2", "object one" = "some identifier1") - expect_length(project@personnel[[1]]@organizationName, 2) - expect_equal(project@personnel[[1]]@organizationName[[1]]@.Data, "org1") - expect_equal(project@personnel[[1]]@organizationName[[2]]@.Data, "org2") + expect_error(reorder_pids(pid_list, doc)) }) -test_that("an other entity can be added from a pid", { - if (!is_token_set(mn)) { - skip("No token set. Skipping test.") - } +test_that('eml_nsf_to_project generates a valid project section', { - data_path <- tempfile() - writeLines(LETTERS, data_path) - pid <- publish_object(mn, data_path, "text/plain") + # for a single award, EML 2.1.1 + awards <- "1203146" + proj <- eml_nsf_to_project(awards) - eml_path <- file.path(system.file("inst", package = "arcticdatautils"), "example-eml.xml") + me <- list(individualName = list(givenName = "Jeanette", surName = "Clark")) - doc <- EML::read_eml(eml_path) - doc@dataset@otherEntity <- new("ListOfotherEntity", list()) + doc <- list(packageId = "id", system = "system", + dataset = list(title = "A Mimimal Valid EML Dataset", + creator = me, + contact = me)) - set_other_entities(mn, eml_path, pid) + doc$dataset$project <- proj - doc <- EML::read_eml(eml_path) - testthat::expect_length(doc@dataset@otherEntity, 1) + expect_true(eml_validate(doc)) + + # for multiple awards, EML 2.1.1 + awards <- c("1203146", "1203473", "1603116") + + proj <- eml_nsf_to_project(awards) + + me <- list(individualName = list(givenName = "Jeanette", surName = "Clark")) + + doc <- list(packageId = "id", system = "system", + dataset = list(title = "A Mimimal Valid EML Dataset", + creator = me, + contact = me)) + + doc$dataset$project <- proj + + expect_true(eml_validate(doc)) + + # for multiple awards, EML 2.2.0 + awards <- c("1203146", "1203473", "1603116") + + emld::eml_version("eml-2.2.0") + proj <- eml_nsf_to_project(awards, eml_version = "2.2") + + me <- list(individualName = list(givenName = "Jeanette", surName = "Clark")) + + doc <- list(packageId = "id", system = "system", + dataset = list(title = "A Mimimal Valid EML Dataset", + creator = me, + contact = me)) + + doc$dataset$project <- proj + + expect_true(eml_validate(doc)) - unlink(data_path) }) + +test_that('eml_nsf_to_project handles bad funding numbers gracefully', { + + awards <- c("abcdef", "1203473", "12345") + + expect_warning(proj <- eml_nsf_to_project(awards), "this award will not be included in the project section") + + me <- list(individualName = list(givenName = "Jeanette", surName = "Clark")) + + doc <- list(packageId = "id", system = "system", + dataset = list(title = "A Mimimal Valid EML Dataset", + creator = me, + contact = me)) + + doc$dataset$project <- proj + + expect_true(eml_validate(doc)) +}) + +test_that('eml_nsf_to_project fails gracefully', { + + awards <- c("abcdef", "12345") + expect_error(suppressWarnings(proj <- eml_nsf_to_project(awards)), "No valid award numbers were found") + +}) + + diff --git a/tests/testthat/test_environment.R b/tests/testthat/test_environment.R index 91a1529..ba42e7a 100644 --- a/tests/testthat/test_environment.R +++ b/tests/testthat/test_environment.R @@ -1,24 +1,18 @@ -#' test_environment.R -#' -#' Test functions related to loading handling application environment. - -context("environment") +context("Environment") test_that("can load a simple environment file", { - x <- yaml::yaml.load_file(file.path(system.file("tests", "testfiles", package = "arcticdatautils"), "test_environment.yml")) + x <- yaml::yaml.load_file(system.file("./environment.yml", package = "arcticdatautils")) expect_true(length(x) == 3) expect_true(length(setdiff(c("development", "test", "production"), names(x))) == 0) }) - test_that("an environment string can be returned", { expect_is(env_get(), "character") expect_true(nchar(env_get()) > 0) }) test_that("can correctly load the environment", { - # Defaults to development if the env var isn't found Sys.setenv("ARCTICDATA_ENV" = "") expect_true(env_get() == "development") diff --git a/tests/testthat/test_formats.R b/tests/testthat/test_formats.R new file mode 100644 index 0000000..3cbb06c --- /dev/null +++ b/tests/testthat/test_formats.R @@ -0,0 +1,12 @@ +context("Formats") + +test_that("valid formats are valid and invalid ones are not", { + expect_true(check_format("text/csv")) + expect_error(check_format("badformat")) +}) + +test_that("a format can be returned", { + fmt <- format_eml("2.2") + expect_is(fmt, "character") + expect_gt(length(fmt), 0) +}) diff --git a/tests/testthat/test_helpers.R b/tests/testthat/test_helpers.R index cc2f5d8..929872c 100644 --- a/tests/testthat/test_helpers.R +++ b/tests/testthat/test_helpers.R @@ -1,6 +1,4 @@ -#' test_helpers.R - -context("helpers") +context("Helpers") mn <- env_load()$mn @@ -17,3 +15,40 @@ test_that("a dummy package can be created", { expect_true(object_exists(mn, result$data)) expect_true(object_exists(mn, result$resource_map)) }) + +test_that("create_dummy_package_full errors if wrong input", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + expect_error(create_dummy_package_full(mn, title = 11)) + expect_error(create_dummy_package_full("mn")) +}) + +test_that("list_submissions returns correct output", { + cn <- dataone::CNode('PROD') + adc <- dataone::getMNode(cn, 'urn:node:ARCTIC') + if (!is_token_set(adc)) { + skip("No token set. Skipping test.") + } + + out <- list_submissions(adc, '2018-10-01', '2018-10-03') + expect_equal(out$submitter_name[1], 'Rachel Obbard') +}) + +test_that("A failed submission can be recovered", { + #test runs without a token + + cn <- dataone::CNode('PROD') + adc <- dataone::getMNode(cn, 'urn:node:ARCTIC') + pids <- dataone::query(adc, list(q="fileName:(*eml_draft* AND -*Mullen*)", + fl = "id", + rows="50")) + + path <- tempfile(fileext = ".xml") + + recover_failed_submission(adc, pids[[1]]$id[1], path) + + doc <- EML::read_eml(path) + expect_true(EML::eml_validate(doc)) +}) diff --git a/tests/testthat/test_inventory.R b/tests/testthat/test_inventory.R index 8022be8..cac897f 100644 --- a/tests/testthat/test_inventory.R +++ b/tests/testthat/test_inventory.R @@ -1,6 +1,4 @@ -#' test_inventory.R - -context("inventory") +context("Inventory") test_that("an inventory can be created correctly", { x <- inv_init() @@ -8,84 +6,9 @@ test_that("an inventory can be created correctly", { expect_true(nrow(x) == 0) }) -test_that("an inventory can be populated with files", { - # Case 1: Empty inv, non-empty file - inv <- inv_init() - result <- inv_load_files(inv, - file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "files_simple.txt")) - expect_true(nrow(result) == 3) - - # Case 2: Non-empty inv, non-empty file, where some are the same - inv <- read.csv(file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "inventory_simple.csv"), stringsAsFactors = FALSE) - result <- inv_load_files(inv, - file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "files_simple.txt")) - expect_true(nrow(result) == 5) - - # Case 3: Non-empty inv, non-empty file, where none are the same - inv <- read.csv(file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "inventory_simple.csv"), stringsAsFactors = FALSE) - inv <- subset(inv, file != "C") - result <- inv_load_files(inv, - file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "files_simple.txt")) - expect_true(nrow(result) == 5) - - # Case 4: Non-empty inv, non-empty file, inventory already has columns - inv <- read.csv(file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "inventory_simple.csv"), stringsAsFactors = FALSE) - inv$bytes <- rep(1000, nrow(inv)) - result <- inv_load_files(inv, - file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "files_simple.txt")) - - expect_true(nrow(result) == 5) - expect_true(ncol(result) == 2) - expect_true(length(table(result$bytes, exclude=NULL)) == 2) -}) - -test_that("an inventory can be populated with byte sizes", { - inv <- read.csv(file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "inventory_simple.csv"), stringsAsFactors = FALSE) - result <- inv_load_sizes(inv, - file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "sizes_simple.txt")) - - expect_true(nrow(result) == 3) - expect_true(ncol(result) == 2) -}) - -test_that("an inventory can be populated with checksums", { - # Test if we can populate with checksums before sizes - inv <- read.csv(file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "inventory_simple.csv"), stringsAsFactors = FALSE) - inv_with_checksums <- inv_load_checksums(inv, - file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "checksums_simple.txt")) - inv_with_both <- inv_load_sizes(inv_with_checksums, - file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "sizes_simple.txt")) - - expect_true(nrow(inv_with_both) == 3) - expect_true(ncol(inv_with_both) == 3) - - # Test if we can populate with sizes before checksums - inv <- read.csv(file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "inventory_simple.csv"), stringsAsFactors = FALSE) - inv_with_sizes <- inv_load_sizes(inv, - file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "sizes_simple.txt")) - inv_with_both <- inv_load_checksums(inv_with_sizes, - file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "checksums_simple.txt")) - - expect_true(nrow(inv_with_both) == 3) - expect_true(ncol(inv_with_both) == 3) -}) - -# We should be able to call the same function multiple times and not break things -test_that("calling things repeatedly does not break things", { - inv <- read.csv(file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "inventory_simple.csv"), stringsAsFactors = FALSE) - inv <- inv_load_checksums(inv, - file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "checksums_simple.txt")) - inv$test <- NA - inv <- inv_load_checksums(inv, - file.path(system.file("tests", "testfiles", "inventory", package = "arcticdatautils"), "checksums_simple.txt")) - - expect_true(ncol(inv) == 3) - expect_true(nrow(inv) == 3) -}) - test_that("an inventory can be updated with new information", { - test_inv <- data.frame(file = "A", pid="", created = FALSE, stringsAsFactors = FALSE) - new_inv <- data.frame(file = "A", pid="pidA", created = TRUE, stringsAsFactors = FALSE) + test_inv <- data.frame(file = "A", pid = "", created = FALSE, stringsAsFactors = FALSE) + new_inv <- data.frame(file = "A", pid = "pidA", created = TRUE, stringsAsFactors = FALSE) result <- inv_update(test_inv, new_inv) expect_true(result[1,"pid"] == "pidA") diff --git a/tests/testthat/test_packaging.R b/tests/testthat/test_packaging.R index 94a9ef3..b65a06b 100644 --- a/tests/testthat/test_packaging.R +++ b/tests/testthat/test_packaging.R @@ -1,6 +1,4 @@ -#' test_packaging.R - -context("packaging") +context("Packaging") test_that("child pids are correctly determined", { inventory <- data.frame(pid = c("A", "B", "C"), @@ -13,13 +11,3 @@ test_that("child pids are correctly determined", { expect_equal(determine_child_pids(inventory, "B"), "resource_map_C") expect_equal(determine_child_pids(inventory, "C"), NULL) }) - - -test_that("extra triple can be added to a resource map", { - path <- generate_resource_map("metadata", "data", - other_statements = data.frame(subject="http://example.com/me", - predicate="http://example.com/is_related_to", - object="http://example.com/myself")) - statements <- parse_resource_map(path) - expect_true("http://example.com/me" %in% statements$subject) -}) diff --git a/tests/testthat/test_sysmeta.R b/tests/testthat/test_sysmeta.R index 3fbcd08..ed29476 100644 --- a/tests/testthat/test_sysmeta.R +++ b/tests/testthat/test_sysmeta.R @@ -1,4 +1,4 @@ -context("sysmeta") +context("System metadata") test_that("the replication policy gets cleared", { library(datapack) @@ -9,3 +9,38 @@ test_that("the replication policy gets cleared", { sysmeta <- clear_replication_policy(sysmeta) expect_false(sysmeta@replicationAllowed) }) + +test_that("the replication policy gets defaulted correctly", { + # this is just a regression test + + library(datapack) + + sysmeta <- new("SystemMetadata") + sysmeta <- clear_replication_policy(sysmeta) + + expect_false(sysmeta@replicationAllowed) + expect_equal(sysmeta@numberReplicas, 0) + expect_identical(sysmeta@blockedNodes, list("urn:node:KNB", "urn:node:mnUCSB1")) +}) + +test_that("all system metadata is retrieved", { + cn_staging <- tryCatch(CNode("STAGING"), error = function(e) CNode("STAGING")) + adc_test <- getMNode(cn_staging, "urn:node:mnTestARCTIC") + + rm_pid <- "resource_map_urn:uuid:3e3bb5de-ec63-4f13-a549-813f0cf28610" + + expect_error(get_all_sysmeta(7, "")) + expect_error(get_all_sysmeta(adc_test, "")) + expect_error(get_all_sysmeta(adc_test, "urn:uuid:3e3bb5de-ec63-4f13-a549-813f0cf28610")) + expect_error(get_all_sysmeta(adc_test, rm_pid, nmax = -7)) + expect_error(get_all_sysmeta(adc_test, rm_pid, child_packages = 7)) + + all <- get_all_sysmeta(adc_test, rm_pid) + + expect_message(get_all_sysmeta(adc_test, rm_pid)) + expect_type(all, "list") + expect_length(all, 5) + expect_equal(names(all)[1], "dummy_resource_map.xml") + + expect_message(get_all_sysmeta(adc_test, "resource_map_urn:uuid:924f81f6-2e68-4eb8-925f-53f5b66318ec")) +}) diff --git a/tests/testthat/test_util.R b/tests/testthat/test_util.R index 2560c77..5f0fd2a 100644 --- a/tests/testthat/test_util.R +++ b/tests/testthat/test_util.R @@ -1,16 +1,4 @@ -#' test_util.R - -context("util") - -test_that("identifiers can be extracted", { - x <- file.path(system.file("tests", "testfiles", package = "arcticdatautils"), "example-field-projects-file.xml") - identifier <- extract_local_identifier("field-projects", x) - expect_equal(identifier, "215.001") - - x <- file.path(system.file("tests", "testfiles", package = "arcticdatautils"), "example-gateway-file.xml") - identifier <- extract_local_identifier("gateway", x) - expect_equal(identifier, "urn:x-wmo:md:org.aoncadis.www::d9330d2b-4174-11e3-8af4-00c0f03d5b7c") -}) +context("Utilities") test_that("paths can be joined", { expect_equal(path_join(""), "") @@ -28,41 +16,19 @@ test_that("paths can be joined", { expect_equal(path_join("~/src/arcticdata./inst/asdf"), "~/src/arcticdata/inst/asdf") }) -test_that("a string can be added to a file", { - # Prepare a temp file with an example EML doc - eml_file <- file.path(system.file("tests", "testfiles", package = "arcticdatautils"), "example-eml.xml") - tmp <- tempfile() - file.copy(eml_file, tmp) - - # Get original title - doc_pre <- XML::xmlParseDoc(tmp) - title_pre <- XML::xmlValue(XML::getNodeSet(doc_pre, "//dataset/title")[[1]]) - - # Add the string - add_string_to_title(tmp, " a test") - - # Get the updated title - doc_post <- XML::xmlParseDoc(tmp) - title_post <- XML::xmlValue(XML::getNodeSet(doc_post, "//dataset/title")[[1]]) - - expect_equal(paste0(title_pre, " a test"), title_post) - - # Clean up - file.remove(tmp) -}) - -test_that("a package id can be changed", { - library(EML) - - eml_file <- file.path(system.file("tests", "testfiles", package = "arcticdatautils"), "example-eml.xml") - tmp <- tempfile() - file.copy(eml_file, tmp) - - replace_package_id(tmp, "new_package_id") - - doc <- read_eml(tmp) - expect_equal(as.character(doc@packageId), "new_package_id") - - # Clean up - file.remove(tmp) +test_that('we can set public READ on all versions of a data package', { + cn <- dataone::CNode('STAGING') + mn <- dataone::getMNode(cn,'urn:node:mnTestARCTIC') + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + pkg <- create_dummy_package(mn) + remove_public_read(mn, unlist(pkg)) + pkg_v2 <- publish_update(mn, pkg$metadata, pkg$resource_map, pkg$data, public = FALSE) + # Set public read on all versions + set_public_read_all_versions(mn, pkg$resource_map) + pids <- c(unlist(pkg), unlist(pkg_v2)) + + expect_true(all(is_public_read(mn, pids))) }) diff --git a/vignettes/a-overview.Rmd b/vignettes/a-overview.Rmd new file mode 100644 index 0000000..232c585 --- /dev/null +++ b/vignettes/a-overview.Rmd @@ -0,0 +1,23 @@ +--- +title: "Overview" +author: "Bryce Mecum" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Overview} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +## Abbreviated API overview + +- `publish_update()`: + - Mint a DOI for a package + - Replace the metadata for a package + - Add/remove data in a package +- `publish_object()`: Use before `publish_update()` if you're adding new data to a package +- `update_resource_map()`: Edit the set of child packages for a package +- `create_resource_map()`: Useful for creating a new package from scratch. For both project-level metadata packages or dataset-level packages +- `set_rights_and_access()`: Use this to give a user edit rights to a package + +The package does way more than this but the above are the most common tasks. diff --git a/vignettes/basic-usage.Rmd b/vignettes/b-basic-usage.Rmd similarity index 100% rename from vignettes/basic-usage.Rmd rename to vignettes/b-basic-usage.Rmd diff --git a/vignettes/overview.Rmd b/vignettes/overview.Rmd deleted file mode 100644 index dddc18c..0000000 --- a/vignettes/overview.Rmd +++ /dev/null @@ -1,23 +0,0 @@ ---- -title: "Overview" -author: "Bryce Mecum" -date: "`r Sys.Date()`" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Overview} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -## Abbreviated API overview: - -- `publish_update`: - - Mint a DOI for a package - - Replace the metadata for a package, from a local file - - Add/remove data in a package -- `publish_object`: Use before `publish_update` if you're adding new data to a package. -- `update_resource_map`: Edit the set of child packages for a package -- `create_resource_map`: Useful for creating a new package from scratch. For both project-level metadata packages or dataset-level packages. -- `set_rights_and_access`: Use this to give a user edit rights to a package - -The package does way more than this but the above are the most common tasks.