From bffdbc96eaba17837379fe176e342df1293e6479 Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Tue, 7 Oct 2025 16:31:19 +0200 Subject: [PATCH 1/9] Improve test assertions and error handling in API modules Updated test cases in add.rs, remove.rs, and delete.rs to use more robust assertions and error handling. Tests now check response status and provide clearer error messages, improving reliability and debuggability of test failures. --- src/native_api/admin/tools/add.rs | 7 +++++-- src/native_api/admin/tools/remove.rs | 10 ++++++---- src/native_api/dataset/delete.rs | 29 ++++++++++++++-------------- 3 files changed, 26 insertions(+), 20 deletions(-) diff --git a/src/native_api/admin/tools/add.rs b/src/native_api/admin/tools/add.rs index 0cdb3d0..0c3984f 100644 --- a/src/native_api/admin/tools/add.rs +++ b/src/native_api/admin/tools/add.rs @@ -223,8 +223,11 @@ mod tests { manifest.tool_name = format!("fabulous{random_suffix}"); // First registration - let response1 = register_external_tool(&client, manifest.clone(), false).await; - assert!(response1.is_ok()); + let response1 = register_external_tool(&client, manifest.clone(), false) + .await + .unwrap(); + + assert!(response1.status.is_ok()); // Second registration without overwrite - should panic register_external_tool(&client, manifest.clone(), false) diff --git a/src/native_api/admin/tools/remove.rs b/src/native_api/admin/tools/remove.rs index 2926501..ad81684 100644 --- a/src/native_api/admin/tools/remove.rs +++ b/src/native_api/admin/tools/remove.rs @@ -80,11 +80,13 @@ mod tests { let added = register_external_tool(&client, manifest.clone(), false) .await - .expect("Could not register external tool") - .data - .unwrap(); + .expect("Could not register external tool"); - let tool_id = added.id; + if added.status.is_err() { + panic!("Could not register external tool: {:#?}", added); + } + + let tool_id = added.data.unwrap().id; let response = remove_external_tool(&client, tool_id) .await .expect("Could not remove external tool: Request failed"); diff --git a/src/native_api/dataset/delete.rs b/src/native_api/dataset/delete.rs index 6f78252..c64b008 100644 --- a/src/native_api/dataset/delete.rs +++ b/src/native_api/dataset/delete.rs @@ -1,15 +1,12 @@ use typify::import_types; use crate::{ - client::{BaseClient, evaluate_response}, + client::{evaluate_response, BaseClient}, request::RequestType, response::Response, }; -import_types!( - schema = "models/dataset/delete.json", - struct_builder = true, -); +import_types!(schema = "models/dataset/delete.json", struct_builder = true,); /// Deletes a dataset by its ID. /// @@ -61,7 +58,7 @@ pub async fn delete_dataset( #[cfg(test)] mod tests { - use crate::prelude::{BaseClient, dataset}; + use crate::prelude::{dataset, BaseClient}; use crate::test_utils::{create_test_dataset, extract_test_env}; /// Tests the successful deletion of an existing dataset. @@ -83,18 +80,22 @@ mod tests { async fn test_delete_dataset() { // Set up the client let (api_token, base_url, _) = extract_test_env(); - let client = BaseClient::new(&base_url, Some(&api_token)) - .expect("Failed to create client"); + let client = BaseClient::new(&base_url, Some(&api_token)).expect("Failed to create client"); // Create a dataset let (id, _) = create_test_dataset(&client, "Root").await; // Delete the dataset let response = dataset::delete::delete_dataset(&client, &id) - .await.expect("Failed to delete dataset"); + .await + .expect("Failed to delete dataset"); // Assert the request was successful - assert!(response.status.is_ok()); + assert!( + response.status.is_ok(), + "Failed to delete dataset: {:?}", + response + ); } /// Tests the deletion of a non-existent dataset. @@ -117,14 +118,14 @@ mod tests { async fn test_delete_dataset_not_found() { // Set up the client let (api_token, base_url, _) = extract_test_env(); - let client = BaseClient::new(&base_url, Some(&api_token)) - .expect("Failed to create client"); + let client = BaseClient::new(&base_url, Some(&api_token)).expect("Failed to create client"); // Attempt to delete a non-existent dataset let response = dataset::delete::delete_dataset(&client, &-1) - .await.expect("Failed to delete dataset"); + .await + .expect("Failed to delete dataset"); // Assert the request was successful assert!(response.status.is_err()); } -} \ No newline at end of file +} From 09bf05106b3864d23b5cd5af8c63c21c85126053 Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Tue, 7 Oct 2025 16:31:27 +0200 Subject: [PATCH 2/9] Reformat external_tool_test.json indentation Updated the indentation in tests/fixtures/external_tool_test.json for improved readability and consistency. No changes to the actual data structure or content. --- tests/fixtures/external_tool_test.json | 60 +++++++++++++------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/tests/fixtures/external_tool_test.json b/tests/fixtures/external_tool_test.json index bf7aaeb..eeb7b81 100644 --- a/tests/fixtures/external_tool_test.json +++ b/tests/fixtures/external_tool_test.json @@ -1,34 +1,34 @@ { - "displayName": "Fabulous File Tool", - "description": "A non-existent tool that is fabulous fun for files!", - "toolName": "fabulous", - "scope": "file", - "types": [ - "explore", - "preview" - ], - "toolUrl": "https://fabulousfiletool.com", - "contentType": "text/tab-separated-values", - "httpMethod":"GET", - "toolParameters": { - "queryParameters": [ - { - "fileid": "{fileId}" - }, - { - "datasetPid": "{datasetPid}" - }, - { - "locale":"{localeCode}" - } - ] - }, - "allowedApiCalls": [ + "displayName": "Fabulous File Tool", + "description": "A non-existent tool that is fabulous fun for files!", + "toolName": "fabulous", + "scope": "file", + "types": [ + "explore", + "preview" + ], + "toolUrl": "https://fabulousfiletool.com", + "contentType": "text/tab-separated-values", + "httpMethod":"GET", + "toolParameters": { + "queryParameters": [ { - "name":"retrieveDataFile", - "httpMethod":"GET", - "urlTemplate":"/api/v1/access/datafile/{fileId}", - "timeOut":270 + "fileid": "{fileId}" + }, + { + "datasetPid": "{datasetPid}" + }, + { + "locale":"{localeCode}" } ] - } \ No newline at end of file + }, + "allowedApiCalls": [ + { + "name":"retrieveDataFile", + "httpMethod":"GET", + "urlTemplate":"/api/v1/access/datafile/{fileId}", + "timeOut":270 + } + ] +} \ No newline at end of file From 8a829dc7b793c93c0614c3e7abad0f3bbf6ff1e7 Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Tue, 7 Oct 2025 16:31:36 +0200 Subject: [PATCH 3/9] Add script to initialize S3 bucket in LocalStack Introduces init-s3.sh to automate creation of the 'mybucket' S3 bucket for Dataverse storage in LocalStack. --- conf/localstack/init-s3.sh | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100755 conf/localstack/init-s3.sh diff --git a/conf/localstack/init-s3.sh b/conf/localstack/init-s3.sh new file mode 100755 index 0000000..d2ea8b2 --- /dev/null +++ b/conf/localstack/init-s3.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +# Create the mybucket bucket for Dataverse S3 storage +awslocal s3 mb s3://mybucket + +echo "S3 bucket 'mybucket' created successfully" + From 20cbdf1c143b9bcab7607f7f0f31338539eefff1 Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Tue, 7 Oct 2025 16:31:47 +0200 Subject: [PATCH 4/9] Add Docker setup and test runner scripts Introduces docker-compose configuration for Dataverse testing, a local environment file for service variables, and a shell script to automate container startup and test execution. This enables streamlined local integration testing with Dataverse and related services. --- docker/docker-compose-base.yml | 254 +++++++++++++++++++++++++++++++++ local-test.env | 13 ++ run-tests.sh | 83 +++++++++++ 3 files changed, 350 insertions(+) create mode 100644 docker/docker-compose-base.yml create mode 100644 local-test.env create mode 100755 run-tests.sh diff --git a/docker/docker-compose-base.yml b/docker/docker-compose-base.yml new file mode 100644 index 0000000..a6cfe5c --- /dev/null +++ b/docker/docker-compose-base.yml @@ -0,0 +1,254 @@ +name: rust-tests + +services: + dataverse: + container_name: 'dataverse' + hostname: dataverse + image: ${DATAVERSE_IMAGE} + restart: on-failure + user: payara + environment: + DATAVERSE_DB_HOST: postgres + DATAVERSE_DB_PASSWORD: secret + DATAVERSE_DB_USER: ${DATAVERSE_DB_USER} + ENABLE_JDWP: '1' + ENABLE_RELOAD: '1' + DATAVERSE_JSF_REFRESH_PERIOD: '1' + DATAVERSE_FEATURE_API_BEARER_AUTH: '1' + DATAVERSE_FEATURE_INDEX_HARVESTED_METADATA_SOURCE: '1' + DATAVERSE_FEATURE_API_BEARER_AUTH_PROVIDE_MISSING_CLAIMS: '1' + DATAVERSE_MAIL_SYSTEM_EMAIL: 'dataverse@localhost' + DATAVERSE_MAIL_MTA_HOST: 'smtp' + DATAVERSE_AUTH_OIDC_ENABLED: '1' + DATAVERSE_AUTH_OIDC_CLIENT_ID: test + DATAVERSE_AUTH_OIDC_CLIENT_SECRET: 94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8 + DATAVERSE_AUTH_OIDC_AUTH_SERVER_URL: http://keycloak.mydomain.com:8090/realms/test + DATAVERSE_SPI_EXPORTERS_DIRECTORY: '/dv/exporters' + # These two oai settings are here to get HarvestingServerIT to pass + dataverse_oai_server_maxidentifiers: '2' + dataverse_oai_server_maxrecords: '2' + JVM_ARGS: -Ddataverse.files.storage-driver-id=file1 + -Ddataverse.files.file1.type=file + -Ddataverse.files.file1.label=Filesystem + -Ddataverse.files.file1.directory=${STORAGE_DIR}/store + -Ddataverse.files.localstack1.type=s3 + -Ddataverse.files.localstack1.label=LocalStack + -Ddataverse.files.localstack1.custom-endpoint-url=http://localstack:4566 + -Ddataverse.files.localstack1.custom-endpoint-region=us-east-2 + -Ddataverse.files.localstack1.bucket-name=mybucket + -Ddataverse.files.localstack1.path-style-access=true + -Ddataverse.files.localstack1.upload-redirect=true + -Ddataverse.files.localstack1.download-redirect=true + -Ddataverse.files.localstack1.access-key=default + -Ddataverse.files.localstack1.secret-key=default + -Ddataverse.pid.providers=fake + -Ddataverse.pid.default-provider=fake + -Ddataverse.pid.fake.type=FAKE + -Ddataverse.pid.fake.label=FakeDOIProvider + -Ddataverse.pid.fake.authority=10.5072 + -Ddataverse.pid.fake.shoulder=FK2/ + ports: + - '8080:8080' # HTTP (Dataverse Application) + - '4949:4848' # HTTPS (Payara Admin Console) + - '9009:9009' # JDWP + - '8686:8686' # JMX + networks: + - dataverse + depends_on: + postgres: + condition: service_started + solr: + condition: service_started + dv_initializer: + condition: service_completed_successfully + dev_localstack: + condition: service_healthy + volumes: + - ${PWD}/dv/data:/dv + tmpfs: + - /dumps:mode=770,size=2052M,uid=1000,gid=1000 + - /tmp:mode=770,size=2052M,uid=1000,gid=1000 + mem_limit: 2147483648 # 2 GiB + mem_reservation: 1024m + privileged: false + healthcheck: + test: curl --fail http://dataverse:8080/api/info/version || exit 1 + interval: 10s + retries: 20 + start_period: 20s + timeout: 240s + + dv_initializer: + container_name: 'dv_initializer' + image: ${CONFIGBAKER_IMAGE} + restart: 'no' + command: + - sh + - -c + - 'fix-fs-perms.sh dv' + volumes: + - ${PWD}/dv/data:/dv + + postgres: + container_name: 'postgres' + hostname: postgres + image: postgres:${POSTGRES_VERSION} + restart: on-failure + environment: + - POSTGRES_USER=${DATAVERSE_DB_USER} + - POSTGRES_PASSWORD=${DATAVERSE_DB_PASSWORD} + ports: + - '5432:5432' + networks: + - dataverse + + solr_initializer: + container_name: 'solr_initializer' + image: ${CONFIGBAKER_IMAGE} + restart: 'no' + command: + - sh + - -c + - 'fix-fs-perms.sh solr && cp -a /template/* /solr-template' + volumes: + - ${PWD}/solr/data:/var/solr + - ${PWD}/solr/conf:/solr-template + + solr: + container_name: 'solr' + hostname: 'solr' + image: solr:${SOLR_VERSION} + depends_on: + solr_initializer: + condition: service_completed_successfully + restart: on-failure + ports: + - '8983:8983' + networks: + - dataverse + command: + - 'solr-precreate' + - 'collection1' + - '/template' + volumes: + - ${PWD}/solr/data:/var/solr + - ${PWD}/solr/conf:/template + + smtp: + container_name: 'smtp' + hostname: 'smtp' + image: maildev/maildev:2.0.5 + restart: on-failure + ports: + - '25:25' # smtp server + - '1080:1080' # web ui + environment: + - MAILDEV_SMTP_PORT=25 + - MAILDEV_MAIL_DIRECTORY=/mail + networks: + - dataverse + tmpfs: + - /mail:mode=770,size=128M,uid=1000,gid=1000 + + keycloak: + container_name: 'keycloak' + image: 'quay.io/keycloak/keycloak:26.3.2' + hostname: keycloak + environment: + - KEYCLOAK_ADMIN=kcadmin + - KEYCLOAK_ADMIN_PASSWORD=kcpassword + - KEYCLOAK_LOGLEVEL=DEBUG + - KC_HOSTNAME_STRICT=false + networks: + dataverse: + aliases: + - keycloak.mydomain.com + command: start-dev --import-realm --http-port=8090 + ports: + - '8090:8090' + volumes: + - ${PWD}/conf/keycloak/test-realm.json:/opt/keycloak/data/import/test-realm.json + + proxy: + container_name: 'proxy' + image: caddy:2-alpine + command: ['caddy', 'run', '-c', '/Caddyfile'] + ports: + - '4848:4848' # Will expose Payara Admin Console (HTTPS) as HTTP + restart: always + volumes: + - ${PWD}/conf/proxy/Caddyfile:/Caddyfile:ro + depends_on: + - dataverse + networks: + - dataverse + + dev_localstack: + container_name: 'dev_localstack' + hostname: 'localstack' + image: localstack/localstack:4.2.0 + restart: on-failure + ports: + - '127.0.0.1:4566:4566' + environment: + - DEBUG=${DEBUG-} + - DOCKER_HOST=unix:///var/run/docker.sock + - HOSTNAME_EXTERNAL=localstack + networks: + - dataverse + volumes: + - ${PWD}/conf/localstack:/etc/localstack/init/ready.d + tmpfs: + - /localstack:mode=770,size=128M,uid=1000,gid=1000 + + previewers_provider: + container_name: 'previewers_provider' + hostname: previewers-provider + image: trivadis/dataverse-previewers-provider:latest + ports: + - '9080:9080' + networks: + - dataverse + environment: + - NGINX_HTTP_PORT=9080 + - PREVIEWERS_PROVIDER_URL=http://localhost:9080 + - VERSIONS=v1.4,betatest + platform: linux/amd64 + + register_previewers: + container_name: 'register_previewers' + hostname: register-previewers + image: trivadis/dataverse-deploy-previewers:latest + networks: + - dataverse + environment: + - DATAVERSE_URL=http://dataverse:8080 + - TIMEOUT=10m + - PREVIEWERS_PROVIDER_URL=http://localhost:9080 + - EXCLUDE_PREVIEWERS= + - REMOVE_EXISTING=true + command: + - deploy + restart: 'no' + platform: linux/amd64 + + bootstrap: + container_name: 'bootstrap' + hostname: 'bootstrap' + image: ${CONFIGBAKER_IMAGE} + restart: 'no' + networks: + - dataverse + volumes: + - ${PWD}/dv/bootstrap.exposed.env:/.env + command: + - sh + - -c + - 'bootstrap.sh -e /.env dev' + depends_on: + dataverse: + condition: service_healthy + +networks: + dataverse: + driver: bridge diff --git a/local-test.env b/local-test.env new file mode 100644 index 0000000..dee7241 --- /dev/null +++ b/local-test.env @@ -0,0 +1,13 @@ +# Dataverse +DATAVERSE_IMAGE=docker.io/gdcc/dataverse:latest +DATAVERSE_DB_USER=dataverse +DATAVERSE_DB_PASSWORD=secret +CONFIGBAKER_IMAGE=docker.io/gdcc/configbaker:unstable + +# Services +POSTGRES_VERSION=17 +SOLR_VERSION=9.8.0 + +# Default Dataverse version (will be dynamically detected in tests) +DV_VERSION=6.7.1 + diff --git a/run-tests.sh b/run-tests.sh new file mode 100755 index 0000000..7056849 --- /dev/null +++ b/run-tests.sh @@ -0,0 +1,83 @@ +#!/bin/bash + +# Check if Docker is installed +if ! command -v docker &>/dev/null; then + echo "āœ‹ Docker is not installed. Please install Docker before running this script." + exit 1 +fi + +# Parse command line arguments +TEST_NAME="" +if [ $# -gt 0 ]; then + TEST_NAME="$1" +fi + +# Prepare the environment for the test +mkdir dv >>/dev/null 2>&1 +touch dv/bootstrap.exposed.env >>/dev/null 2>&1 + +printf "\nšŸš€ Preparing containers\n" + +# Start all containers (infrastructure + tests) +printf "\nšŸš€ Starting all containers...\n" +printf " The test container will wait for Dataverse and fetch the version automatically\n\n" + +docker compose \ + -f docker/docker-compose-base.yml \ + --env-file local-test.env \ + up -d + +printf "\nšŸš€ Waiting for Dataverse to be ready...\n" + +# Wait for Dataverse to be ready by checking the version endpoint +max_attempts=60 +attempt=1 +while [ $attempt -le $max_attempts ]; do + printf " Attempt $attempt/$max_attempts: Checking if Dataverse is ready...\n" + + if curl -s -f http://localhost:8080/api/info/version > /dev/null 2>&1; then + printf " āœ… Dataverse is ready!\n" + break + fi + + if [ $attempt -eq $max_attempts ]; then + printf " āŒ Dataverse failed to start after $max_attempts attempts\n" + printf " šŸ“‹ Checking container logs...\n" + docker logs dataverse + exit 1 + fi + + printf " ā³ Dataverse not ready yet, waiting 10 seconds...\n" + sleep 10 + attempt=$((attempt + 1)) +done + +# Fetch the env variables from the container +printf "\nšŸš€ Fetching environment variables from container...\n" + +# Copy the bootstrap.exposed.env file from the container to get the API_TOKEN +docker cp dataverse:/dv/bootstrap.exposed.env dv/bootstrap.exposed.env + +# Source the environment variables +if [ -f dv/bootstrap.exposed.env ]; then + export $(grep "API_TOKEN" "dv/bootstrap.exposed.env") + export API_TOKEN_SUPERUSER=$API_TOKEN + export BASE_URL=http://localhost:8080 + export DV_VERSION=$(curl -s -f http://localhost:8080/api/info/version | jq -r '.data.version') + printf " āœ… Environment variables loaded successfully\n" + printf " šŸ“‹ API_TOKEN: ********\n" + printf " šŸ“‹ BASE_URL: ${BASE_URL}\n" + printf " šŸ“‹ DV_VERSION: ${DV_VERSION}\n" +else + printf " āŒ Failed to fetch bootstrap.exposed.env from container\n" + exit 1 +fi + +# When ready, run cargo test +if [ -n "$TEST_NAME" ]; then + printf "\nšŸš€ Running specific test: $TEST_NAME...\n" + cargo test "$TEST_NAME" +else + printf "\nšŸš€ Running all tests...\n" + cargo test +fi From 1a6cb99dade26c41e0d2c65623baed1f34e78f8d Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Tue, 7 Oct 2025 16:31:52 +0200 Subject: [PATCH 5/9] Revamp Readme with detailed usage and features Expanded the Readme to provide comprehensive documentation, including feature overview, installation instructions, library and CLI usage examples, development and testing workflow, contribution guidelines, and community resources. Improves clarity for new users and contributors, and highlights key capabilities and configuration options. --- Readme.md | 243 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 187 insertions(+), 56 deletions(-) diff --git a/Readme.md b/Readme.md index 385db20..ddbe633 100644 --- a/Readme.md +++ b/Readme.md @@ -6,114 +6,245 @@ ![Build Status](https://github.com/JR-1991/rust-dataverse/actions/workflows/tests.yml/badge.svg) -**Dataverse Rust** is a client library and command-line interface (CLI) for interacting with -the [Dataverse API](https://guides.dataverse.org/en/latest/api/). This project is in active development and not yet -feature complete. +A comprehensive Rust library and command-line interface for interacting with the [Dataverse API](https://guides.dataverse.org/en/latest/api/). Build robust data repository workflows with type-safe, asynchronous operations. + +> **Note:** This project is under active development. While core functionality is stable, the API may evolve before the 1.0 release. + +## Why Dataverse Rust? + +- **šŸš€ High Performance** - Built with async/await using Tokio and Reqwest for efficient concurrent operations +- **šŸ”’ Type Safety** - Leverage Rust's type system to catch errors at compile time +- **⚔ Direct Upload** - Parallel batch uploads for fast file transfers to S3-compatible storage +- **šŸŽÆ Dual Interface** - Use as a library in your Rust projects or as a standalone CLI tool +- **šŸ” Secure Authentication** - Multiple auth methods including system keyring integration for credential storage +- **šŸ“¦ Flexible Configuration** - JSON and YAML support for all configuration files ## Features -Current capabilities include: +**Dataverse Rust** provides complete coverage of core Dataverse operations through both a programmatic library interface and a full-featured CLI: + +### šŸ“š Collections + +Create, publish, and manage Dataverse collections with hierarchical organization support. + +### šŸ“Š Datasets -### Collection Management +Full dataset lifecycle management including creation, metadata editing, versioning, publishing, linking, and deletion. Support for dataset locks and review workflows. -- **Create**: Create a new collection within the Dataverse. -- **Delete**: Remove an existing collection. -- **Publish**: Publish a collection to make it publicly available. -- **Contents**: Retrieve the contents of a collection. +### šŸ“ Files -### General Information +Upload files via standard or direct upload (with parallel batch support), replace existing files, download files and complete datasets, and manage file metadata. -- **Version**: Retrieve the current version of the Dataverse instance. +### šŸ” Search -### Dataset Management +Query datasets and files across your Dataverse instance with flexible search parameters. -- **Get**: Fetch details of a specific dataset. -- **Create**: Create a new dataset within a collection. -- **Edit**: Modify an existing dataset. -- **Delete**: Delete an unpublished dataset. -- **Upload**: Upload a file to a dataset. -- **Publish**: Publish a dataset to make it publicly available. -- **Link**: Link datasets to other collections. +### šŸ› ļø Administration -### File Management +Manage storage drivers, configure external tools, and perform administrative operations. -- **Replace**: Replace existing files in a dataset. +### ā„¹ļø Instance Information + +Retrieve version information and available metadata exporters from your Dataverse instance. ## Installation -**Command line** +### CLI Installation + +Install the command-line tool directly from the repository: ```bash cargo install --git https://github.com/JR-1991/rust-dataverse.git ``` -**Cargo.toml** +### Library Installation -Please note, this crate is not yet published on crates.io. You can add it to your `Cargo.toml` file by pointing to the -GitHub repository. +Add to your `Cargo.toml`: ```toml [dependencies] dataverse = { git = "https://github.com/JR-1991/rust-dataverse" } ``` +> **Note:** Not yet published on crates.io. Pre-1.0 releases will be available soon. + ## Usage -### Command line +### Library Usage + +The library provides an async API built on `tokio` and `reqwest`. Import the prelude for common types: + +```rust +use dataverse::prelude::*; + +#[tokio::main] +async fn main() -> Result<(), Box> { + // Initialize client + let client = BaseClient::new( + "https://demo.dataverse.org", + Some("your-api-token") + )?; + + // Get instance version + let version = info::get_version(&client).await?; + println!("Dataverse version: {}", version.data.unwrap()); + + // Create a dataset + let dataset_body = dataset::create::DatasetCreateBody { + // ... configure metadata + ..Default::default() + }; + let response = dataset::create_dataset(&client, "root", dataset_body).await?; + + // Upload a file + let file = UploadFile::from("path/to/file.csv"); + let identifier = Identifier::PersistentId("doi:10.5072/FK2/ABCDEF".to_string()); + dataset::upload_file_to_dataset(&client, identifier, file, None, None).await?; + + Ok(()) +} +``` -Before you can use the command line tool, you need to set the `DVCLI_URL` and `DVCLI_TOKEN` environment variables. You -can do this by adding the following lines to your `.bashrc` or `.bash_profile` file: +**Key Library Modules:** -```bash -export DVCLI_URL="https://your.dataverse.url" -export DVCLI_TOKEN="your_token_here" -``` +- `dataverse::client::BaseClient` - HTTP client for API interactions +- `dataverse::native_api::collection` - Collection operations +- `dataverse::native_api::dataset` - Dataset operations +- `dataverse::native_api::file` - File operations +- `dataverse::native_api::admin` - Administrative operations +- `dataverse::search_api` - Search functionality +- `dataverse::direct_upload` - Direct upload with parallel batch support +- `dataverse::data_access` - File and dataset downloads + +### CLI Usage + +The CLI provides three flexible authentication methods: -The command line tool in organized in subcommands. To see a list of available subcommands, run: +#### 1. Profile-Based (Recommended) + +Store credentials securely in your system keyring: ```bash -dvcli --help +# Create a profile +dvcli auth set --name production --url https://dataverse.org --token your-api-token + +# Use the profile +dvcli --profile production info version ``` -To see help for a specific subcommand, run: +#### 2. Environment Variables + +Set environment variables for automatic authentication: ```bash -dvcli --help +export DVCLI_URL="https://demo.dataverse.org" +export DVCLI_TOKEN="your-api-token" + +dvcli dataset meta doi:10.5072/FK2/ABC123 ``` -**Example** +#### 3. Interactive Mode -In this examples we will demonstrate how to retrieve the version of the Dataverse instance. +If neither profile nor environment variables are set, the CLI will prompt for credentials: ```bash dvcli info version +# Prompts for URL and token ``` -The output will be similar to: +**Common CLI Operations:** + +> **Note:** Configuration files can be provided in both JSON and YAML formats. ```bash -Calling: http://localhost:8080/api/info/version -└── šŸŽ‰ Success! - Received the following response: +# Get help +dvcli --help +dvcli dataset --help -{ - "version": "6.2" -} +# Collections +dvcli collection create --parent root --body collection.json +dvcli collection publish my-collection + +# Datasets +dvcli dataset create --collection root --body dataset.json # or dataset.yaml +dvcli dataset upload --id doi:10.5072/FK2/ABC123 data.csv +dvcli dataset publish doi:10.5072/FK2/ABC123 + +# Direct upload (faster for large files) +dvcli dataset direct-upload --id doi:10.5072/FK2/ABC123 --parallel 5 file1.csv file2.csv + +# Files +dvcli file replace --id 12345 --path new-file.csv +dvcli file download file-pid.txt --path ./downloads/ + +# Search +dvcli search -q "climate change" -t dataset -t file + +# Admin +dvcli admin storage-drivers +dvcli admin add-external-tool tool-manifest.json ``` ## Examples -We have provided an example in the `examples` directory. These examples demonstrate how to use the client to perform -various operations. +Complete workflow examples are available in the [`examples/`](examples/) directory: + +- **[create-upload-publish](examples/create-upload-publish)** - End-to-end workflow demonstrating collection and dataset creation, file upload, and publishing using shell scripts and the CLI. + +Besides these examples, you can also find some recipes in the [Dataverse Recipes](https://github.com/gdcc/dataverse-recipes/tree/main/dvcli) repository, which cover most of the functionality of the CLI. + +## Development + +### Running Tests + +Tests require a running Dataverse instance. We provide a convenient test script that handles infrastructure setup: + +```bash +# Run all tests (starts Docker containers automatically) +./run-tests.sh + +# Run a specific test +./run-tests.sh test_create_dataset +``` + +The script automatically: + +- Starts Dataverse with PostgreSQL and Solr via Docker Compose +- Waits for services to be ready +- Configures environment variables +- Executes the test suite + +Docker containers remain running after tests complete for faster subsequent runs. View logs with `docker logs dataverse` if you encounter issues. + +### Manual Test Setup + +For granular control during development: + +```bash +# Start infrastructure +docker compose -f ./docker/docker-compose-base.yml --env-file local-test.env up -d + +# Configure environment +export BASE_URL=http://localhost:8080 +export DV_VERSION=6.2 +export $(grep "API_TOKEN" "dv/bootstrap.exposed.env") +export API_TOKEN_SUPERUSER=$API_TOKEN + +# Run tests +cargo test +cargo test -- --nocapture # with output +cargo test test_name # specific test +cargo test collection:: # module tests +``` + +## Contributing + +Contributions are welcome! Whether you're fixing bugs, adding features, or improving documentation, your help is appreciated. Please feel free to open issues or submit pull requests on [GitHub](https://github.com/JR-1991/rust-dataverse). + +## Community -* [`examples/create-upload-publish`](examples/create-upload-publish) - Demonstrates how to create a collection, dataset, - upload a file, and publish the collection and dataset. +Join the conversation on the [Dataverse Zulip Channel](https://dataverse.zulipchat.com)! Connect with other developers, get help, share ideas, and discuss the future of Rust clients for Dataverse. -## ToDo's +## License -- [ ] Implement remaining API endpoints -- [x] Write unit and integration tests -- [x] Asynchronous support using `tokio` -- [x] Documentation -- [ ] Publish on crates.io -- [x] Continuous integration -- [ ] Validate before upload using `/api/dataverses/$ID/validateDatasetJson` +This project is licensed under the MIT License - see the [License.md](License.md) file for details. From 6037bcf1394ca4566d0a39aa9b0a2e22a9af0646 Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Tue, 7 Oct 2025 16:32:00 +0200 Subject: [PATCH 6/9] Refactor tool-response schema for structured parameters Updated the tool-response.json schema to use structured objects for toolParameters, allowedApiCalls, and requirements instead of JSON strings. This change improves validation, clarity, and maintainability by explicitly defining the expected properties and types for each field. --- models/admin/tools/tool-response.json | 78 ++++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 6 deletions(-) diff --git a/models/admin/tools/tool-response.json b/models/admin/tools/tool-response.json index 5e72700..ae1a788 100644 --- a/models/admin/tools/tool-response.json +++ b/models/admin/tools/tool-response.json @@ -47,16 +47,82 @@ "description": "Either GET or POST" }, "toolParameters": { - "type": "string", - "description": "JSON string representation of the tool parameters object containing query parameters for substitution" + "type": "object", + "properties": { + "queryParameters": { + "type": "array", + "items": { + "type": "object", + "patternProperties": { + "^[a-zA-Z][a-zA-Z0-9_]*$": { + "type": "string", + "pattern": "^\\{(siteUrl|fileId|filePid|apiToken|datasetId|datasetPid|datasetVersion|localeCode)\\}$" + } + }, + "additionalProperties": false, + "minProperties": 1, + "maxProperties": 1 + }, + "description": "Key/value combinations that can be appended to the toolUrl using reserved words for substitution" + } + }, + "additionalProperties": false, + "description": "Tool parameters object containing query parameters for substitution" }, "allowedApiCalls": { - "type": "string", - "description": "JSON string representation of an array of objects defining callbacks the tool is allowed to make to the Dataverse API" + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "A name the tool will use to identify this callback URL" + }, + "httpMethod": { + "type": "string", + "enum": ["GET", "POST", "PUT", "DELETE"], + "description": "Which HTTP method the specified callback uses" + }, + "urlTemplate": { + "type": "string", + "description": "The relative URL for the callback using reserved words to indicate where values should be dynamically substituted" + }, + "timeOut": { + "type": "integer", + "minimum": 1, + "description": "For non-public datasets and datafiles, how many minutes the signed URLs given to the tool should be valid for" + } + }, + "required": ["name", "httpMethod", "urlTemplate"], + "additionalProperties": false + }, + "description": "An array of objects defining callbacks the tool is allowed to make to the Dataverse API" }, "requirements": { - "type": "string", - "description": "JSON string representation of resources your tool needs to function" + "type": "object", + "properties": { + "auxFilesExist": { + "type": "array", + "items": { + "type": "object", + "properties": { + "formatTag": { + "type": "string", + "description": "The format tag of the required auxiliary file" + }, + "formatVersion": { + "type": "string", + "description": "The format version of the required auxiliary file" + } + }, + "required": ["formatTag", "formatVersion"], + "additionalProperties": false + }, + "description": "An array containing formatTag and formatVersion pairs for each auxiliary file that your tool needs to download to function properly" + } + }, + "additionalProperties": false, + "description": "Resources your tool needs to function. Currently, requirements only apply to preview tools" } }, "required": ["id", "displayName", "scope", "toolUrl"], From f301be9ec944c3b739ab9ef2ccdf852ad33d9f7a Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Tue, 7 Oct 2025 16:32:11 +0200 Subject: [PATCH 7/9] Add libdbus-sys usage for Linux builds Added a conditional use statement for libdbus-sys on non-macOS and non-Windows targets to ensure the dependency is included for keyring support on Linux, even though it is not directly used in the code. --- src/lib.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 02b3a39..e5125d3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,6 +6,10 @@ #![warn(unused_crate_dependencies)] +// libdbus-sys is needed by keyring on Linux but not directly used +#[cfg(not(any(target_os = "macos", target_os = "windows")))] +use libdbus_sys as _; + /// Client functionality for interacting with Dataverse APIs pub mod client; From 181d99be9c62f02547448ca192d3df6a9eac15e9 Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Tue, 7 Oct 2025 16:32:16 +0200 Subject: [PATCH 8/9] Update .gitignore to exclude dv/ and solr/ directories Added dv/ and solr/ to .gitignore to prevent these directories from being tracked by git. This helps keep the repository clean from generated or environment-specific files. --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 8fd7651..9b77851 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,6 @@ src/bin/dvtest.rs .idea/* .DS_Store */**/.DS_Store + +dv/ +solr/ \ No newline at end of file From 2796175f8fc5f84325ab0e8a6efea6eea087e21f Mon Sep 17 00:00:00 2001 From: Jan Range <30547301+JR-1991@users.noreply.github.com> Date: Tue, 7 Oct 2025 16:34:33 +0200 Subject: [PATCH 9/9] Condense feature list formatting in Readme Simplified the Features section by merging individual feature descriptions into a concise bulleted list for improved readability. --- Readme.md | 31 ++++++------------------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/Readme.md b/Readme.md index ddbe633..10c373a 100644 --- a/Readme.md +++ b/Readme.md @@ -21,31 +21,12 @@ A comprehensive Rust library and command-line interface for interacting with the ## Features -**Dataverse Rust** provides complete coverage of core Dataverse operations through both a programmatic library interface and a full-featured CLI: - -### šŸ“š Collections - -Create, publish, and manage Dataverse collections with hierarchical organization support. - -### šŸ“Š Datasets - -Full dataset lifecycle management including creation, metadata editing, versioning, publishing, linking, and deletion. Support for dataset locks and review workflows. - -### šŸ“ Files - -Upload files via standard or direct upload (with parallel batch support), replace existing files, download files and complete datasets, and manage file metadata. - -### šŸ” Search - -Query datasets and files across your Dataverse instance with flexible search parameters. - -### šŸ› ļø Administration - -Manage storage drivers, configure external tools, and perform administrative operations. - -### ā„¹ļø Instance Information - -Retrieve version information and available metadata exporters from your Dataverse instance. +- **šŸ“š Collections** - Create, publish, and manage Dataverse collections with hierarchical organization support +- **šŸ“Š Datasets** - Full dataset lifecycle management including creation, metadata editing, versioning, publishing, linking, and deletion. Support for dataset locks and review workflows +- **šŸ“ Files** - Upload files via standard or direct upload (with parallel batch support), replace existing files, download files and complete datasets, and manage file metadata +- **šŸ” Search** - Query datasets and files across your Dataverse instance with flexible search parameters +- **šŸ› ļø Administration** - Manage storage drivers, configure external tools, and perform administrative operations +- **ā„¹ļø Instance Information** - Retrieve version information and available metadata exporters from your Dataverse instance ## Installation