Skip to content

Sync Files from Hydroshare to GitHub #80

Sync Files from Hydroshare to GitHub

Sync Files from Hydroshare to GitHub #80

name: Sync Files from Hydroshare to GitHub
on:
workflow_dispatch:
inputs:
hydro-collection-id:
description: 'Hydroshare collection ID to sync from'
required: false
default: ac6cc75dcb0146cf9cc17a974f4bb08b
push:
branches:
- main
schedule:
- cron: '0 0 * * *'
permissions:
contents: write
pull-requests: write
jobs:
sync_from_hs_to_github:
strategy:
matrix:
python-version: [3.12]
platform: [ubuntu-latest]
runs-on: ${{ matrix.platform }}
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install hsclient
- name: Copy Files from HydroShare
env:
HYDRO_USERNAME: ${{ secrets.HYDRO_USERNAME }}
HYDRO_PASSWORD: ${{ secrets.HYDRO_PASSWORD }}
HYDRO_COLLECTION_ID: ${{ github.event.inputs.hydro-collection-id || secrets.HYDRO_COLLECTION_ID }}
working-directory: ${{ github.workspace }}
run: |
python -c "
import os
import datetime
from hsclient import HydroShare
hs = HydroShare(os.getenv('HYDRO_USERNAME'), os.getenv('HYDRO_PASSWORD'))
collection = hs.resource(os.getenv('HYDRO_COLLECTION_ID'))
# get a list of resources in the collection
relations = collection.metadata.relations
for relation in relations:
if relation.type.value == 'This resource includes':
# get the resource id by splitting the relation.value
resource_id = relation.value.split('/')[-1]
resource = hs.resource(resource_id)
# only add the resource if it has been updated in the last day
try:
resource_updated_string = resource.system_metadata()['date_last_updated']
except (KeyError, NameError):
print(f'Resource {resource_id} does not have a date_last_updated field.')
continue
resource_updated = datetime.datetime.strptime(resource_updated_string, '%Y-%m-%dT%H:%M:%S.%fZ')
if resource_updated > datetime.datetime.now() - datetime.timedelta(days=1):
resource_dir = os.path.join(os.getenv('GITHUB_WORKSPACE'), resource_id)
# download the resource
resource.download()
# unzip the resource
os.system(f'unzip -o {resource_id}.zip && rm {resource_id}.zip')
"
- name: Commit and push the changes
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add .
if ! git diff --cached --quiet; then
git commit -m "Sync files from HydroShare"
git push
else
echo "No changes to commit"
fi