diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index 29a597bc13e..00000000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,174 +0,0 @@ -version: 2.1 - -orbs: - slack: circleci/slack@4.2.1 - build-tools: circleci/build-tools@2.9.0 - -jobs: - test: - parameters: - machine-size: - type: string - default: large - build-type: - type: string - build-mysql: - type: string - default: "" - build-mariadb: - type: string - default: "" - build-postgresql: - type: string - default: "" - machine: - image: ubuntu-2004:202010-01 - resource_class: << parameters.machine-size >> - steps: - - run: - name: Halt builds except SBT test suite if there is no PR associated with the commit - command: | - if [[ -z "${CI_PULL_REQUEST}" ]] && [[ "${BUILD_TYPE}" != "sbt" ]] ; then - circleci-agent step halt - fi - - checkout - - run: - name: Custom step - configure GIT identity - command: | - git config user.email "circleci@example.com" - git config user.name "CircleCI" - - build-tools/merge-with-parent: - parent: develop - - restore_cache: - key: sbt-cache - - run: - command: src/ci/bin/test.sh - no_output_timeout: 1h - - run: - name: Do tricks to avoid unnecessary cache updates - command: | - find ~/.ivy2/cache -name "ivydata-*.properties" -print -delete - find ~/.sbt -name "*.lock" -print -delete - - store_test_results: - path: target/test-reports - - save_cache: - key: sbt-cache - paths: - - "~/.ivy2/cache" - - "~/.sbt" - environment: - CIRCLE_COMMIT_RANGE: << pipeline.git.base_revision >>...<< pipeline.git.revision >> - BUILD_TYPE: << parameters.build-type >> - BUILD_MYSQL: << parameters.build-mysql >> - BUILD_MARIADB: << parameters.build-mariadb >> - BUILD_POSTGRESQL: << parameters.build-postgresql >> - -workflows: - all-tests: - jobs: - - test: - name: testSbt - build-type: "sbt" - - test: - name: testSingleWorkflowRunner - build-type: "singleWorkflowRunner" - - test: - name: testDbms - build-type: "dbms" - - test: - name: testHoricromtalDeadlock - build-type: "horicromtalDeadlock" - - test: - name: testDockerScripts - build-type: "dockerScripts" - - test: - name: testReferenceDiskManifestBuilderApp - build-type: "referenceDiskManifestBuilderApp" - - test: - name: testCentaurAws - build-type: "centaurAws" - build-mysql: "5.7" - - test: - name: testCentaurDummy - build-type: "centaurDummy" - build-mysql: "5.7" - - test: - name: testCentaurEngineUpgradeLocal - build-type: "centaurEngineUpgradeLocal" - build-mysql: "5.7" - - test: - name: testCentaurEngineUpgradePapiV2alpha1 - build-type: "centaurEngineUpgradePapiV2alpha1" - build-mysql: "5.7" - - test: - name: testCentaurHoricromtalPapiV2alpha1 - build-type: "centaurHoricromtalPapiV2alpha1" - build-mysql: "5.7" - - test: - name: testCentaurHoricromtalPapiV2beta-MySQL - build-type: "centaurHoricromtalPapiV2beta" - build-mysql: "5.7" - - test: - name: testCentaurHoricromtalPapiV2beta-MariaDB - build-type: "centaurHoricromtalPapiV2beta" - build-mariadb: "10.3" - - test: - name: testCentaurHoricromtalEngineUpgradePapiV2alpha1-MySQL - build-type: "centaurHoricromtalEngineUpgradePapiV2alpha1" - build-mysql: "5.7" - - test: - name: testCentaurHoricromtalEngineUpgradePapiV2alpha1-MariaDB - build-type: "centaurHoricromtalEngineUpgradePapiV2alpha1" - build-mariadb: "10.3" - - test: - name: testCentaurPapiUpgradePapiV2alpha1 - build-type: "centaurPapiUpgradePapiV2alpha1" - build-mysql: "5.7" - - test: - name: testCentaurPapiUpgradeNewWorkflowsPapiV2alpha1 - build-type: "centaurPapiUpgradeNewWorkflowsPapiV2alpha1" - build-mysql: "5.7" - - test: - name: testCentaurLocal-MySQL - build-type: "centaurLocal" - build-mysql: "5.7" - - test: - name: testCentaurLocal-Postgresql - build-type: "centaurLocal" - build-postgresql: "11.3" - - test: - name: testCentaurPapiV2alpha1 - build-type: "centaurPapiV2alpha1" - build-mysql: "5.7" - - test: - name: testCentaurPapiV2beta - build-type: "centaurPapiV2beta" - build-mysql: "5.7" - - test: - name: testCentaurSlurm - build-type: "centaurSlurm" - build-mysql: "5.7" - - test: - name: testCentaurTes - build-type: "centaurTes" - build-mysql: "5.7" - - test: - name: testCentaurWdlUpgradeLocal - build-type: "centaurWdlUpgradeLocal" - build-mysql: "5.7" - - test: - name: testCheckPublish - build-type: "checkPublish" - build-mysql: "5.7" - - test: - name: testConformanceLocal - build-type: "conformanceLocal" - build-mysql: "5.7" - - test: - name: testConformancePapiV2beta - build-type: "conformancePapiV2beta" - build-mysql: "5.7" - - test: - name: testConformanceTesk - build-type: "conformanceTesk" - build-mysql: "5.7" diff --git a/.github/workflows/chart_update_on_merge.yml b/.github/workflows/chart_update_on_merge.yml index aa7a2c9c9ad..a2b14f2ec65 100644 --- a/.github/workflows/chart_update_on_merge.yml +++ b/.github/workflows/chart_update_on_merge.yml @@ -9,8 +9,14 @@ jobs: chart-update: name: Cromwhelm Chart Auto Updater if: github.event.pull_request.merged == true - runs-on: self-hosted # Faster machines; see https://github.com/broadinstitute/cromwell/settings/actions/runners + runs-on: ubuntu-latest steps: + - name: Fetch Jira ID from the commit message + id: fetch-jira-id + run: | + JIRA_ID=$(echo '${{ github.event.pull_request.title }}' | grep -Eo '[A-Z][A-Z]+-[0-9]+' | xargs echo -n | tr '[:space:]' ',') + [[ -z "$JIRA_ID" ]] && { echo "No Jira ID found in $1" ; exit 1; } + echo "JIRA_ID=$JIRA_ID" >> $GITHUB_OUTPUT - name: Clone Cromwell uses: actions/checkout@v2 with: @@ -49,6 +55,7 @@ jobs: username: dsdejenkins password: ${{ secrets.DSDEJENKINS_PASSWORD }} # Build & push `cromwell`, `womtool`, `cromiam`, and `cromwell-drs-localizer` + # This step is validated in the GHA 'docker_build_test.yml' without the accompanying docker push - name: Build Cromwell Docker run: | set -e @@ -68,7 +75,7 @@ jobs: repository: broadinstitute/terra-helmfile event-type: update-service client-payload: '{"service": "cromiam", "version": "${{ env.CROMWELL_VERSION }}", "dev_only": false}' - - name: Edit & push chart + - name: Edit & push cromwhelm chart env: BROADBOT_GITHUB_TOKEN: ${{ secrets.BROADBOT_GITHUB_TOKEN }} run: | @@ -76,10 +83,53 @@ jobs: cd cromwhelm git checkout main ls -la - sed -i "s/appVersion.*/appVersion: \"$CROMWELL_VERSION\"/" cromwell-helm/Chart.yaml - sed -i "s/image: broadinstitute\/cromwell.*/image: broadinstitute\/cromwell:$CROMWELL_VERSION/" cromwell-helm/templates/cromwell.yaml + sed -i "s|image: broadinstitute/cromwell:.*|image: broadinstitute/cromwell:$CROMWELL_VERSION|" terra-batch-libchart/values.yaml + git diff git config --global user.name "broadbot" git config --global user.email "broadbot@broadinstitute.org" - git commit -am "Auto update to Cromwell $CROMWELL_VERSION" + git commit -am "${{ steps.fetch-jira-id.outputs.JIRA_ID }}: Auto update to Cromwell $CROMWELL_VERSION" git push https://broadbot:$BROADBOT_GITHUB_TOKEN@github.com/broadinstitute/cromwhelm.git main + cd - + + - name: Clone terra-helmfile + uses: actions/checkout@v3 + with: + repository: broadinstitute/terra-helmfile + token: ${{ secrets.BROADBOT_GITHUB_TOKEN }} # Has to be set at checkout AND later when pushing to work + path: terra-helmfile + + - name: Update workflows-app in terra-helmfile + run: | + set -e + cd terra-helmfile + sed -i "s|image: broadinstitute/cromwell:.*|image: broadinstitute/cromwell:$CROMWELL_VERSION|" charts/workflows-app/values.yaml + cd - + + - name: Update cromwell-runner-app in terra-helmfile + run: | + set -e + cd terra-helmfile + sed -i "s|image: broadinstitute/cromwell:.*|image: broadinstitute/cromwell:$CROMWELL_VERSION|" charts/cromwell-runner-app/values.yaml + cd - + + + - name: Make PR in terra-helmfile + env: + BROADBOT_TOKEN: ${{ secrets.BROADBOT_GITHUB_TOKEN }} + GH_TOKEN: ${{ secrets.BROADBOT_GITHUB_TOKEN }} + run: | + set -e + JIRA_ID=${{ steps.fetch-jira-id.outputs.JIRA_ID }} + if [[ $JIRA_ID == "missing" ]]; then + echo "JIRA_ID missing, PR to terra-helmfile will not be created" + exit 0; + fi + cd terra-helmfile + git checkout -b ${JIRA_ID}-cromwell-update-$CROMWELL_VERSION + git config --global user.name "broadbot" + git config --global user.email "broadbot@broadinstitute.org" + git commit -am "${JIRA_ID}: Auto update Cromwell to $CROMWELL_VERSION in workflows-app and cromwell-runner-app" + git push -u origin ${JIRA_ID}-cromwell-update-$CROMWELL_VERSION + gh pr create --title "${JIRA_ID}: auto update Cromwell version to $CROMWELL_VERSION in workflows-app and cromwell-runner-app" --body "${JIRA_ID} helm chart update" --label "automerge" + cd - diff --git a/.github/workflows/consumer_contract_tests.yml b/.github/workflows/consumer_contract_tests.yml new file mode 100644 index 00000000000..0970e45e863 --- /dev/null +++ b/.github/workflows/consumer_contract_tests.yml @@ -0,0 +1,143 @@ +name: Consumer contract tests +# The purpose of this workflow is to run a suite of Cromwell contract tests against mock service provider(s) using Pact framework. +# +# More details about Contract Testing can be found in our handbook +# +# https://broadworkbench.atlassian.net/wiki/spaces/IRT/pages/2660368406/Getting+Started+with+Pact+Contract+Testing +# +# This workflow involves Cromwell as a consumer, and ANY provider (e.g. Sam) Cromwell consumes. +# Each party owns a set of tests (aka contract tests). +# +# Consumer contract tests (aka consumer tests) runs on a mock provider service and does not require a real provider service. +# Provider contract tests (aka provider verification tests) runs independently of any consumer. +# +# Specifically: +# Cromwell runs consumer tests against mock service. Upon success, publish consumer pacts to +# Pact Broker https://pact-broker.dsp-eng-tools.broadinstitute.org/. +# +# Pact Broker is the source of truth to forge contractual obligations between consumer and provider. +# +# This workflow meets the criteria of Pact Broker *Platinum* as described in https://docs.pact.io/pact_nirvana/step_6. +# The can-i-deploy job has been added to this workflow to support *Platinum* and gate the code for promotion to default branch. +# +# This is how it works. +# +# Consumer makes a change that results in a new pact published to Pact Broker. +# Pact Broker notifies provider(s) of the changed pact and trigger corresponding verification workflows. +# Provider downloads relevant versions of consumer pacts from Pact Broker and kicks off verification tests against the consumer pacts. +# Provider updates Pact Broker with verification status. +# Consumer kicks off can-i-deploy on process to determine if changes can be promoted and used for deployment. +# +# NOTE: The publish-contracts workflow will use the latest commit of the branch that triggers this workflow to publish the unique consumer contract version to Pact Broker. + +on: + pull_request: + paths-ignore: + - 'README.md' + push: + paths-ignore: + - 'README.md' + merge_group: + branches: + - develop + +jobs: + init-github-context: + runs-on: ubuntu-latest + outputs: + repo-branch: ${{ steps.extract-branch.outputs.repo-branch }} + repo-version: ${{ steps.extract-branch.outputs.repo-version }} + fork: ${{ steps.extract-branch.outputs.fork }} + + steps: + - uses: actions/checkout@v3 + + - name: Extract branch + id: extract-branch + run: | + GITHUB_EVENT_NAME=${{ github.event_name }} + if [[ "$GITHUB_EVENT_NAME" == "push" ]]; then + GITHUB_REF=${{ github.ref }} + GITHUB_SHA=${{ github.sha }} + elif [[ "$GITHUB_EVENT_NAME" == "pull_request" ]]; then + GITHUB_REF=refs/heads/${{ github.head_ref }} + GITHUB_SHA=${{ github.event.pull_request.head.sha }} + elif [[ "$GITHUB_EVENT_NAME" == "merge_group" ]]; then + GITHUB_REF=refs/heads/${{ github.head_ref }} + else + echo "Failed to extract branch information" + exit 1 + fi + echo "CURRENT_BRANCH=${GITHUB_REF/refs\/heads\//""}" >> $GITHUB_ENV + echo "CURRENT_SHA=$GITHUB_SHA" >> $GITHUB_ENV + + echo "repo-branch=${GITHUB_REF/refs\/heads\//""}" >> $GITHUB_OUTPUT + echo "repo-version=${GITHUB_SHA}" >> $GITHUB_OUTPUT + echo "fork=${FORK}" >> $GITHUB_OUTPUT + + - name: Is PR triggered by forked repo? + if: ${{ steps.extract-branch.outputs.fork == 'true' }} + run: | + echo "PR was triggered by forked repo" + + - name: Echo repo and branch information + run: | + echo "repo-owner=${{ github.repository_owner }}" + echo "repo-name=${{ github.event.repository.name }}" + echo "repo-branch=${{ steps.extract-branch.outputs.repo-branch }}" + echo "repo-version=${{ steps.extract-branch.outputs.repo-version }}" + echo "fork=${{ steps.extract-branch.outputs.fork }}" + + cromwell-consumer-contract-tests: + runs-on: ubuntu-latest + needs: [init-github-context] + outputs: + pact-b64: ${{ steps.encode-pact.outputs.pact-b64 }} + + steps: + - uses: actions/checkout@v3 + - name: Run consumer tests + run: | + docker run --rm -v $PWD:/working \ + -v jar-cache:/root/.ivy \ + -v jar-cache:/root/.ivy2 \ + -w /working \ + sbtscala/scala-sbt:openjdk-17.0.2_1.7.2_2.13.10 \ + sbt "project pact4s" clean test + + - name: Output consumer contract as non-breaking base64 string + id: encode-pact + run: | + cd pact4s + NON_BREAKING_B64=$(cat target/pacts/cromwell-consumer-drshub-provider.json | base64 -w 0) + echo "pact-b64=${NON_BREAKING_B64}" >> $GITHUB_OUTPUT + + # Prevent untrusted sources from using PRs to publish contracts + # since access to secrets is not allowed. + publish-contracts: + runs-on: ubuntu-latest + if: ${{ needs.init-github-context.outputs.fork == 'false' || needs.init-github-context.outputs.fork == ''}} + needs: [init-github-context, cromwell-consumer-contract-tests] + steps: + - name: Dispatch to terra-github-workflows + uses: broadinstitute/workflow-dispatch@v3 + with: + workflow: .github/workflows/publish-contracts.yaml + repo: broadinstitute/terra-github-workflows + ref: refs/heads/main + token: ${{ secrets.BROADBOT_GITHUB_TOKEN }} # github token for access to kick off a job in the private repo + inputs: '{ "pact-b64": "${{ needs.cromwell-consumer-contract-tests.outputs.pact-b64 }}", "repo-owner": "${{ github.repository_owner }}", "repo-name": "${{ github.event.repository.name }}", "repo-branch": "${{ needs.init-github-context.outputs.repo-branch }}" }' + + can-i-deploy: + runs-on: ubuntu-latest + if: ${{ needs.init-github-context.outputs.fork == 'false' || needs.init-github-context.outputs.fork == ''}} + needs: [ init-github-context, publish-contracts ] + steps: + - name: Dispatch to terra-github-workflows + uses: broadinstitute/workflow-dispatch@v3 + with: + workflow: .github/workflows/can-i-deploy.yaml + repo: broadinstitute/terra-github-workflows + ref: refs/heads/main + token: ${{ secrets.BROADBOT_GITHUB_TOKEN }} # github token for access to kick off a job in the private repo + inputs: '{ "pacticipant": "cromwell-consumer", "version": "${{ needs.init-github-context.outputs.repo-version }}" }' diff --git a/.github/workflows/cromwell_unit_tests.yml b/.github/workflows/cromwell_unit_tests.yml index d0927f8b954..88951871d8f 100644 --- a/.github/workflows/cromwell_unit_tests.yml +++ b/.github/workflows/cromwell_unit_tests.yml @@ -9,6 +9,7 @@ run-name: ${{ github.actor }} running Cromwell sbt unit tests. on: workflow_dispatch: #Manual trigger from GitHub UI push: + merge_group: permissions: contents: read @@ -27,6 +28,10 @@ jobs: #Invoke SBT to run all unit tests for Cromwell. - name: Run tests + env: + AZURE_CLIENT_ID: ${{ secrets.VAULT_AZURE_CENTAUR_CLIENT_ID }} + AZURE_CLIENT_SECRET: ${{ secrets.VAULT_AZURE_CENTAUR_CLIENT_SECRET }} + AZURE_TENANT_ID: ${{ secrets.VAULT_AZURE_CENTAUR_TENANT_ID }} run: | set -e sbt "test" diff --git a/.github/workflows/docker_build_test.yml b/.github/workflows/docker_build_test.yml new file mode 100644 index 00000000000..01c2ea502c9 --- /dev/null +++ b/.github/workflows/docker_build_test.yml @@ -0,0 +1,37 @@ +name: 'Docker Build Test' + +# This test verifies that we can successfully build the same docker images that we release. +# Includes `cromwell`, `womtool`, `cromiam`, and `cromwell-drs-localizer` +# See chart_update_on_merge.yml for the actual release workflow. + +run-name: ${{ github.actor }} Docker Build Test + +on: + workflow_dispatch: + push: + merge_group: + +permissions: + contents: read + +jobs: + sbt-build: + name: sbt docker build + runs-on: ubuntu-latest + steps: + - name: Clone Cromwell + uses: actions/checkout@v2 + with: + repository: broadinstitute/cromwell + token: ${{ secrets.BROADBOT_GITHUB_TOKEN }} + path: cromwell + - uses: olafurpg/setup-scala@v10 + with: + java-version: adopt@1.11 + # The following invocation should be as similar as possible to the one in chart_update_on_merge.yml + # To state the obvious: This test should not publish anything. It should simply verify that the build completes. + - name: Build Cromwell Docker + run: | + set -e + cd cromwell + sbt -Dproject.isSnapshot=false -Dproject.isRelease=false docker diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml new file mode 100644 index 00000000000..ebafe51064c --- /dev/null +++ b/.github/workflows/integration_tests.yml @@ -0,0 +1,117 @@ +name: 'Integration Tests' + +#This github action runs all of Cromwell's integration tests. + +# This is what shows up in the github workflows page as the title. Using github ternary syntax & format() function. +run-name: ${{ github.event_name == 'schedule' && 'Nightly Integration Testing' || format('{0} Integration Testing', github.actor) }} + +#What will trigger the workflow to run. +on: + workflow_dispatch: #Manual trigger from GitHub UI + push: + schedule: + - cron: '0 0 * * 1-5' + merge_group: + +permissions: + contents: read + +jobs: + integration-tests: + strategy: + fail-fast: false #disabling fail-fast means that even if one test fails, the others will still try to complete. + #Each entry below is a single integration test that lives in /src/ci/bin/. + #Each will be launched on its own runner so they can occur in parallel. + #Friendly names are displayed on the Github UI and aren't used anywhere else. + matrix: + # Batch test fixes to land later + include: + - build_type: centaurGcpBatch + build_mysql: 5.7 + friendly_name: Centaur GCP Batch with MySQL 5.7 + - build_type: centaurPapiV2beta + build_mysql: 5.7 + friendly_name: Centaur Papi V2 Beta with MySQL 5.7 + - build_type: dbms + friendly_name: DBMS + - build_type: centaurTes + build_mysql: 5.7 + friendly_name: Centaur TES with MySQL 5.7 + - build_type: centaurLocal + build_mysql: 5.7 + friendly_name: Centaur Local with MySQL 5.7 + - build_type: checkPublish + friendly_name: Check Publish + - build_type: centaurAws + build_mysql: 5.7 + friendly_name: Centaur AWS with MySQL 5.7 + - build_type: centaurDummy + build_mysql: 5.7 + friendly_name: Centaur Dummy with MySQL 5.7 + - build_type: centaurHoricromtalPapiV2beta + build_mysql: 5.7 + friendly_name: Centaur Horicromtal PapiV2 Beta with MySQL 5.7 + - build_type: horicromtalDeadlock + friendly_name: Horicromtal Deadlock + - build_type: singleWorkflowRunner + friendly_name: Single Workflow Runner + - build_type: centaurLocal + build_mariadb: 10.3 + friendly_name: Centaur Local with MariaDB 10.3 + - build_type: centaurLocal + build_postgresql: 11.3 + friendly_name: Centaur Local with PostgreSQL 11.3 + - build_type: centaurEngineUpgradeLocal + build_mysql: 5.7 + friendly_name: Centaur Engine Upgrade Local with MySQL 5.7 + - build_type: referenceDiskManifestBuilderApp + friendly_name: Reference Disk Manifest Builder App + - build_type: centaurSlurm + build_mysql: 5.7 + friendly_name: "Centaur Slurm with MySQL 5.7" + - build_type: centaurBlob + build_mysql: 5.7 + friendly_name: Centaur Blob + name: ${{ matrix.friendly_name }} + env: + BUILD_NAME: ${{ matrix.build_type }} + BUILD_TYPE: ${{ matrix.build_type }} #intentionally duplicated variable + BUILD_MYSQL: ${{ matrix.build_mysql }} + BUILD_POSTGRESQL: ${{ matrix.build_postgresql }} + BUILD_MARIADB: ${{ matrix.build_mariadb }} + VAULT_ROLE_ID: ${{ secrets.VAULT_ROLE_ID_CI }} + VAULT_SECRET_ID: ${{ secrets.VAULT_SECRET_ID_CI }} + AZURE_CLIENT_ID: ${{ secrets.VAULT_AZURE_CENTAUR_CLIENT_ID }} + AZURE_CLIENT_SECRET: ${{ secrets.VAULT_AZURE_CENTAUR_CLIENT_SECRET }} + AZURE_TENANT_ID: ${{ secrets.VAULT_AZURE_CENTAUR_TENANT_ID }} + runs-on: ubuntu-latest + timeout-minutes: 120 + steps: + - uses: actions/checkout@v3 # checkout the cromwell repo + with: + ref: ${{ inputs.target-branch }} + - uses: ./.github/set_up_cromwell_action #This github action will set up git-secrets, caching, java, and sbt. + with: + cromwell_repo_token: ${{ secrets.BROADBOT_GITHUB_TOKEN }} + #This script bascially just looks up another script to run, assuming that the other script's filename is: + #src/ci/bin/test${BUILD_TYPE}.sh. The first letter of the BUILD_TYPE is automatically capitalized when looking. + - name: Run Integration Test + shell: 'script -q -e -c "bash --noprofile --norc -eo pipefail {0}"' #See comment below + run: | + set -e + echo Running test.sh + ./src/ci/bin/test.sh + # always() is some github magic that forces the following step to run, even when the previous fails. + # Without it, the if statement won't be evaluated on a test failure. + - uses: ravsamhq/notify-slack-action@v2 + if: always() && github.ref == 'refs/heads/develop' #only report on failures against develop. + with: + status: ${{ job.status }} + notify_when: "failure" + env: + SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} + #The "shell: ..."" line is a way to force the Github Action Runner to use a bash shell that thinks it has a TTY. + #The issue and solution are described here: https://github.com/actions/runner/issues/241#issuecomment-842566950 + #This is only needed for ReferenceDiskManifestBuilderApp test. + #This test uses fancy colors in the output, which likely causes the problem. + #See WX-938. diff --git a/.github/workflows/make_publish_prs.yml b/.github/workflows/make_publish_prs.yml index ced80154c80..e4e98a7f2f0 100644 --- a/.github/workflows/make_publish_prs.yml +++ b/.github/workflows/make_publish_prs.yml @@ -16,7 +16,7 @@ on: jobs: make-firecloud-develop-pr: name: Create firecloud-develop PR - runs-on: self-hosted # Faster machines; see https://github.com/broadinstitute/cromwell/settings/actions/runners + runs-on: ubuntu-latest steps: - name: Clone firecloud-develop uses: actions/checkout@v2 @@ -50,7 +50,7 @@ jobs: git config --global user.email "broadbot@broadinstitute.org" git commit -m "Updating Cromwell version to ${NEW_CROMWELL_V}" git push https://broadbot:$BROADBOT_GITHUB_TOKEN@github.com/broadinstitute/firecloud-develop.git ${NEW_BRANCH_NAME} - echo ::set-output name=NEW_BRANCH_NAME::${NEW_BRANCH_NAME} + echo "NEW_BRANCH_NAME=${NEW_BRANCH_NAME}" >> $GITHUB_OUTPUT - name: Create firecloud-develop PR uses: actions/github-script@v6 with: @@ -70,4 +70,3 @@ jobs: 'It updates cromwell from version ${{ github.event.inputs.old_cromwell_version }} to ${{ github.event.inputs.new_cromwell_version }}.' ].join('\n') }); - diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index 0590d48ef53..b005da65041 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -46,7 +46,7 @@ jobs: # export image name from the log image=$(grep 'Tagging image' build.log | awk '{print $NF}') - echo "::set-output name=image::${image}" + echo "image=${image}" >> $GITHUB_OUTPUT # scan the image - uses: broadinstitute/dsp-appsec-trivy-action@v1 diff --git a/.github/workflows/validate_pr_name.yml b/.github/workflows/validate_pr_name.yml new file mode 100644 index 00000000000..db26bbd95c6 --- /dev/null +++ b/.github/workflows/validate_pr_name.yml @@ -0,0 +1,23 @@ +# A github action to validate the name of a pull request contains a Jira tag: + +name: Validate PR name + +on: + pull_request: + types: [opened, edited, synchronize] + +jobs: + validate_pr_name: + runs-on: ubuntu-latest + steps: + - name: Validate PR title + id: validate + uses: actions/github-script@v3 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const title = context.payload.pull_request.title; + const regex = /[A-Z][A-Z]+-\d+/; + if (!regex.test(title)) { + core.setFailed("PR title must contain a Jira tag"); + } diff --git a/.gitignore b/.gitignore index 571a12c5873..a5b72f6b263 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,12 @@ tags target /site +#from running integration tests locally +actual.json +console_output.txt +expected.json +run_mode_metadata.json + # custom config cromwell-executions cromwell-test-executions @@ -38,7 +44,6 @@ cromwell-service-account.json cwl_conformance_test.inputs.json dockerhub_provider_config_v1.inc.conf dockerhub_provider_config_v2.inc.conf -github_private_deploy_key papi_application.inc.conf papi_refresh_token.options.json papi_v2_gcsa.options.json diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 9ffb48b1147..00000000000 --- a/.travis.yml +++ /dev/null @@ -1,88 +0,0 @@ -os: linux -dist: focal -services: - - docker -language: minimal -git: - depth: false -cache: - directories: - - $HOME/.ivy2/cache - - $HOME/.coursier/cache - # see cromwell::private::delete_sbt_boot for more info - #- $HOME/.sbt/boot/ -before_cache: - # Tricks to avoid unnecessary cache updates - - find $HOME/.ivy2/cache -name "ivydata-*.properties" -print -delete - - find $HOME/.coursier/cache -name "ivydata-*.properties" -print -delete - - find $HOME/.sbt -name "*.lock" -print -delete -env: - jobs: - # Setting this variable twice will cause the 'script' section to run twice with the respective env var invoked - - >- - BUILD_TYPE=centaurAws - BUILD_MYSQL=5.7 - - >- - BUILD_TYPE=centaurDummy - BUILD_MYSQL=5.7 - - >- - BUILD_TYPE=centaurEngineUpgradeLocal - BUILD_MYSQL=5.7 - # Temporarily keeping until `centaurEngineUpgradePapiV2beta` or similar exists - - >- - BUILD_TYPE=centaurEngineUpgradePapiV2alpha1 - BUILD_MYSQL=5.7 - - >- - BUILD_TYPE=centaurHoricromtalPapiV2beta - BUILD_MYSQL=5.7 - - >- - BUILD_TYPE=centaurLocal - BUILD_MARIADB=10.3 - - >- - BUILD_TYPE=centaurLocal - BUILD_MYSQL=5.7 - - >- - BUILD_TYPE=centaurLocal - BUILD_POSTGRESQL=11.3 - - >- - BUILD_TYPE=centaurPapiV2beta - BUILD_MYSQL=5.7 - - >- - BUILD_TYPE=centaurSlurm - BUILD_MYSQL=5.7 - - >- - BUILD_TYPE=centaurTes - BUILD_MYSQL=5.7 - - >- - BUILD_TYPE=checkPublish - - >- - BUILD_TYPE=horicromtalDeadlock - - >- - BUILD_TYPE=sbt - BUILD_SBT_INCLUDE=engine - - >- - BUILD_TYPE=sbt - BUILD_SBT_INCLUDE=server - - >- - BUILD_TYPE=sbt - BUILD_SBT_INCLUDE=services - - >- - BUILD_TYPE=sbt - BUILD_SBT_EXCLUDE='engine|server|services' - - >- - BUILD_TYPE=dbms - - >- - BUILD_TYPE=singleWorkflowRunner - - >- - BUILD_TYPE=referenceDiskManifestBuilderApp -script: - - src/ci/bin/test.sh -notifications: - slack: - rooms: - - secure: B5KYcnhk/ujAUWlHsjzP7ROLm6MtYhaGikdYf6JYINovhMbVKnZCTlZEy7rqT3L2T5uJ25iefD500VQGk1Gn7puQ1sNq50wqjzQaj20PWEiBwoWalcV/nKBcQx1TyFT13LJv8fbFnVPxFCkC3YXoHedx8qAhDs8GH/tT5J8XOC8= - template: - - "Build <%{build_url}|#%{build_number}> (<%{compare_url}|%{commit}>) of %{repository}@%{branch} by %{author} %{result} in %{duration}" - on_success: change - on_failure: change - on_pull_requests: false diff --git a/CHANGELOG.md b/CHANGELOG.md index 0084c05171b..a581852c02e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,19 @@ # Cromwell Change Log +## 86 Release Notes + +### GCP Batch +Cromwell now supports the GCP Batch backend for running workflows. See `Backend` in [ReadTheDocs](https://cromwell.readthedocs.io/en/stable/) for more information. + +### Workflow Completion Callback +Cromwell can be configured to send a POST request to a specified URL when a workflow completes. The request body includes the workflow ID, terminal state, +and (if applicable) final outputs or error message. See `WorkflowCallback` in [ReadTheDocs](https://cromwell.readthedocs.io/en/stable/) for more information. + +### Other Improvements +* Cromwell will now parallelize the downloads of DRS files that resolve to signed URLs. This significantly reduces the time localization takes in certain situations. +* WDL size engine function now works for HTTP files +* Improved Cromwell's handling of docker manifests. Additional logging information is emitted, and Cromwell will fall back to using OCI manifests if it encounters an error with a Docker Image Manifest V2. + ## 85 Release Notes ### Migration of PKs to BIGINT diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 00000000000..34ece8d7792 --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1,5 @@ +# These owners will be the default owners for everything in +# the repo. Unless a later match takes precedence, +# @broadinstitute/dsp-batch will be requested for +# review when someone opens a pull request. +* @broadinstitute/dsp-batch diff --git a/README.md b/README.md index fac1541cf8e..f6e3f8e742e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,3 @@ -[![Build Status](https://travis-ci.com/broadinstitute/cromwell.svg?branch=develop)](https://travis-ci.com/broadinstitute/cromwell?branch=develop) [![codecov](https://codecov.io/gh/broadinstitute/cromwell/branch/develop/graph/badge.svg)](https://codecov.io/gh/broadinstitute/cromwell) ## Welcome to Cromwell diff --git a/azure-blob-nio/README.md b/azure-blob-nio/README.md new file mode 100644 index 00000000000..ad6c553eabf --- /dev/null +++ b/azure-blob-nio/README.md @@ -0,0 +1,5 @@ +# Azure Storage Blob NIO FileSystemProvider + +[This is a copy of the NIO Filesystem implementation version 12.0.0-beta.19](https://github.com/Azure/azure-sdk-for-java/tree/2490e1e19e8531fe0a6378f40e299e7ec64cf3aa/sdk/storage/azure-storage-blob-nio) + +For more information on the initial design and commit history see the Azure SDK repository linked above. Changes to this repo were necessary to support some of the specific needs Cromwell as an App on Azure has as a system in Terra. This is something that has some precedent as it has been done for other filesystems in the past. diff --git a/azure-blob-nio/assets.json b/azure-blob-nio/assets.json new file mode 100644 index 00000000000..c262f7ebafc --- /dev/null +++ b/azure-blob-nio/assets.json @@ -0,0 +1,6 @@ +{ + "AssetsRepo": "Azure/azure-sdk-assets", + "AssetsRepoPrefixPath": "java", + "TagPrefix": "java/storage/azure-storage-blob-nio", + "Tag": "java/storage/azure-storage-blob-nio_b2a0ce219e" +} diff --git a/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureBasicFileAttributeView.java b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureBasicFileAttributeView.java new file mode 100644 index 00000000000..43744893ccb --- /dev/null +++ b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureBasicFileAttributeView.java @@ -0,0 +1,69 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.nio; + +import com.azure.core.util.logging.ClientLogger; + +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.attribute.BasicFileAttributeView; +import java.nio.file.attribute.FileTime; + +/** + * Provides support for basic file attributes. + *

+ * The operations supported by this view and the attributes it reads are a strict subset of + * {@link AzureBlobFileAttributeView} and has the same network behavior. Therefore, while this type is offered for + * compliance with the NIO spec, {@link AzureBlobFileAttributeView} is generally preferred. + *

+ * {@link #setTimes(FileTime, FileTime, FileTime)} is not supported. + */ +public final class AzureBasicFileAttributeView implements BasicFileAttributeView { + private static final ClientLogger LOGGER = new ClientLogger(AzureBasicFileAttributeView.class); + + static final String NAME = "azureBasic"; + + private final Path path; + + AzureBasicFileAttributeView(Path path) { + this.path = path; + } + + /** + * Returns the name of the attribute view: {@code "azureBasic"} + * + * @return the name of the attribute view: {@code "azureBasic"} + */ + @Override + public String name() { + return NAME; + } + + /** + * Reads the basic file attributes as a bulk operation. + *

+ * All file attributes are read as an atomic operation with respect to other file system operations. + * + * @return {@link AzureBasicFileAttributes} + */ + @Override + public AzureBasicFileAttributes readAttributes() throws IOException { + AzurePath.ensureFileSystemOpen(path); + return new AzureBasicFileAttributes(path); + } + + /** + * Unsupported. + * + * @param lastModifiedTime the new last modified time, or null to not change the value + * @param lastAccessTime the last access time, or null to not change the value + * @param createTime the file's create time, or null to not change the value + * @throws UnsupportedOperationException Operation not supported. + * @throws IOException never + */ + @Override + public void setTimes(FileTime lastModifiedTime, FileTime lastAccessTime, FileTime createTime) throws IOException { + throw LoggingUtility.logError(LOGGER, new UnsupportedOperationException()); + } +} diff --git a/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureBasicFileAttributes.java b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureBasicFileAttributes.java new file mode 100644 index 00000000000..d1ab6d28562 --- /dev/null +++ b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureBasicFileAttributes.java @@ -0,0 +1,165 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.nio; + +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.attribute.BasicFileAttributes; +import java.nio.file.attribute.FileAttribute; +import java.nio.file.attribute.FileTime; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +/** + * Provides support for basic file attributes. + *

+ * The properties available on this type are a strict subset of {@link AzureBlobFileAttributes}, and the two types have + * the same network behavior. Therefore, while this type is offered for compliance with the NIO spec, + * {@link AzureBlobFileAttributes} is generally preferred. + *

+ * Some attributes are not supported. Refer to the javadocs on each method for more information. + *

+ * If the target file is a virtual directory, most attributes will be set to null. + */ +public final class AzureBasicFileAttributes implements BasicFileAttributes { + // For verifying parameters on FileSystemProvider.readAttributes + static final Set ATTRIBUTE_STRINGS; + static { + Set set = new HashSet<>(); + set.add("lastModifiedTime"); + set.add("isRegularFile"); + set.add("isDirectory"); + set.add("isVirtualDirectory"); + set.add("isSymbolicLink"); + set.add("isOther"); + set.add("size"); + set.add("creationTime"); + ATTRIBUTE_STRINGS = Collections.unmodifiableSet(set); + } + + private final AzureBlobFileAttributes internalAttributes; + + /* + In order to support Files.exist() and other methods like Files.walkFileTree() which depend on it, we have had to add + support for virtual directories. This is not ideal as customers will have to now perform null checks when inspecting + attributes (or at least check if it is a virtual directory before inspecting properties). It also incurs extra + network requests as we have to call a checkDirectoryExists() after receiving the initial 404. This is two + additional network requests, though they only happen in the case when a file doesn't exist or is virtual, so it + shouldn't happen in the majority of api calls. + */ + AzureBasicFileAttributes(Path path) throws IOException { + this.internalAttributes = new AzureBlobFileAttributes(path); + } + + /** + * Returns the time of last modification or null if this is a virtual directory. + * + * @return the time of last modification or null if this is a virtual directory + */ + @Override + public FileTime lastModifiedTime() { + return this.internalAttributes.lastModifiedTime(); + } + + /** + * Returns the time of last modification or null if this is a virtual directory + *

+ * Last access time is not supported by the blob service. In this case, it is typical for implementations to return + * the {@link #lastModifiedTime()}. + * + * @return the time of last modification or null if this is a virtual directory + */ + @Override + public FileTime lastAccessTime() { + return this.internalAttributes.lastAccessTime(); + } + + /** + * Returns the creation time. The creation time is the time that the file was created. Returns null if this is a + * virtual directory. + * + * @return The creation time or null if this is a virtual directory + */ + @Override + public FileTime creationTime() { + return this.internalAttributes.creationTime(); + } + + /** + * Tells whether the file is a regular file with opaque content. + * + * @return whether the file is a regular file. + */ + @Override + public boolean isRegularFile() { + return this.internalAttributes.isRegularFile(); + } + + /** + * Tells whether the file is a directory. + *

+ * Will only return true if the directory is a concrete directory. See + * {@link AzureFileSystemProvider#createDirectory(Path, FileAttribute[])} for more information on virtual and + * concrete directories. + * + * @return whether the file is a directory + */ + @Override + public boolean isDirectory() { + return this.internalAttributes.isDirectory(); + } + + /** + * Tells whether the file is a virtual directory. + *

+ * See {@link AzureFileSystemProvider#createDirectory(Path, FileAttribute[])} for more information on virtual and + * concrete directories. + * + * @return whether the file is a virtual directory + */ + public boolean isVirtualDirectory() { + return this.internalAttributes.isVirtualDirectory(); + } + + /** + * Tells whether the file is a symbolic link. + * + * @return false. Symbolic links are not supported. + */ + @Override + public boolean isSymbolicLink() { + return this.internalAttributes.isSymbolicLink(); + } + + /** + * Tells whether the file is something other than a regular file, directory, or symbolic link. + * + * @return false. No other object types are supported. + */ + @Override + public boolean isOther() { + return this.internalAttributes.isOther(); + } + + /** + * Returns the size of the file (in bytes). + * + * @return the size of the file + */ + @Override + public long size() { + return this.internalAttributes.size(); + } + + /** + * Returns the url of the resource. + * + * @return The file key, which is the url. + */ + @Override + public Object fileKey() { + return this.internalAttributes.fileKey(); + } +} diff --git a/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureBlobFileAttributeView.java b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureBlobFileAttributeView.java new file mode 100644 index 00000000000..d9366e22417 --- /dev/null +++ b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureBlobFileAttributeView.java @@ -0,0 +1,157 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.nio; + +import com.azure.core.util.logging.ClientLogger; +import com.azure.storage.blob.models.AccessTier; +import com.azure.storage.blob.models.BlobHttpHeaders; +import com.azure.storage.blob.models.BlobStorageException; +import com.azure.storage.blob.specialized.BlobClientBase; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.Path; +import java.nio.file.attribute.BasicFileAttributeView; +import java.nio.file.attribute.FileTime; +import java.util.HashMap; +import java.util.Map; +import java.util.function.Consumer; + +/** + * A file attribute view that provides a view of attributes specific to files stored as blobs in Azure Storage. + *

+ * All attributes are retrieved from the file system as a bulk operation. + *

+ * {@link #setTimes(FileTime, FileTime, FileTime)} is not supported. + */ +public final class AzureBlobFileAttributeView implements BasicFileAttributeView { + private static final ClientLogger LOGGER = new ClientLogger(AzureBlobFileAttributeView.class); + + static final String ATTR_CONSUMER_ERROR = "Exception thrown by attribute consumer"; + static final String NAME = "azureBlob"; + + private final Path path; + + AzureBlobFileAttributeView(Path path) { + this.path = path; + } + + @SuppressWarnings("unchecked") + static Map> setAttributeConsumers(AzureBlobFileAttributeView view) { + Map> map = new HashMap<>(); + map.put("blobHttpHeaders", obj -> { + try { + view.setBlobHttpHeaders((BlobHttpHeaders) obj); + } catch (IOException e) { + throw LoggingUtility.logError(LOGGER, new UncheckedIOException(ATTR_CONSUMER_ERROR, e)); + } + }); + map.put("metadata", obj -> { + try { + Map m = (Map) obj; + if (m == null) { + throw LoggingUtility.logError(LOGGER, new ClassCastException()); + } + view.setMetadata(m); + } catch (IOException e) { + throw LoggingUtility.logError(LOGGER, new UncheckedIOException(ATTR_CONSUMER_ERROR, e)); + } + }); + map.put("tier", obj -> { + try { + view.setTier((AccessTier) obj); + } catch (IOException e) { + throw LoggingUtility.logError(LOGGER, new UncheckedIOException(ATTR_CONSUMER_ERROR, e)); + } + }); + + return map; + } + + /** + * Returns the name of the attribute view: {@code "azureBlob"} + * + * @return the name of the attribute view: {@code "azureBlob"} + */ + @Override + public String name() { + return NAME; + } + + /** + * Reads the file attributes as a bulk operation. + *

+ * All file attributes are read as an atomic operation with respect to other file system operations. A fresh copy is + * retrieved every time this method is called. + * @return {@link AzureBlobFileAttributes} + * @throws IOException if an IOException occurs. + */ + @Override + public AzureBlobFileAttributes readAttributes() throws IOException { + AzurePath.ensureFileSystemOpen(path); + return new AzureBlobFileAttributes(path); + } + + /** + * Sets the {@link BlobHttpHeaders} as an atomic operation. + *

+ * See {@link BlobClientBase#setHttpHeaders(BlobHttpHeaders)} for more information. + * @param headers {@link BlobHttpHeaders} + * @throws IOException if an IOException occurs. + */ + public void setBlobHttpHeaders(BlobHttpHeaders headers) throws IOException { + AzurePath.ensureFileSystemOpen(path); + try { + new AzureResource(this.path).getBlobClient().setHttpHeaders(headers); + } catch (BlobStorageException e) { + throw LoggingUtility.logError(LOGGER, new IOException(e)); + } + } + + /** + * Sets the metadata as an atomic operation. + *

+ * See {@link BlobClientBase#setMetadata(Map)} for more information. + * @param metadata The metadata to associate with the blob + * @throws IOException if an IOException occurs. + */ + public void setMetadata(Map metadata) throws IOException { + AzurePath.ensureFileSystemOpen(path); + try { + new AzureResource(this.path).getBlobClient().setMetadata(metadata); + } catch (BlobStorageException e) { + throw LoggingUtility.logError(LOGGER, new IOException(e)); + } + } + + /** + * Sets the {@link AccessTier} on the file. + *

+ * See {@link BlobClientBase#setAccessTier(AccessTier)} for more information. + * @param tier {@link AccessTier} + * @throws IOException if an IOException occurs. + */ + public void setTier(AccessTier tier) throws IOException { + AzurePath.ensureFileSystemOpen(path); + try { + new AzureResource(this.path).getBlobClient().setAccessTier(tier); + } catch (BlobStorageException e) { + throw LoggingUtility.logError(LOGGER, new IOException(e)); + } + } + + /** + * Unsupported. + * + * @param lastModifiedTime the new last modified time, or null to not change the value + * @param lastAccessTime the last access time, or null to not change the value + * @param createTime the file's create time, or null to not change the value + * @throws UnsupportedOperationException Operation not supported. + * @throws IOException never + */ + @Override + public void setTimes(FileTime lastModifiedTime, FileTime lastAccessTime, FileTime createTime) throws IOException { + throw LoggingUtility.logError(LOGGER, new UnsupportedOperationException()); + } +} diff --git a/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureBlobFileAttributes.java b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureBlobFileAttributes.java new file mode 100644 index 00000000000..c73d062e117 --- /dev/null +++ b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureBlobFileAttributes.java @@ -0,0 +1,369 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.nio; + +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.attribute.BasicFileAttributes; +import java.nio.file.attribute.FileAttribute; +import java.nio.file.attribute.FileTime; +import java.time.OffsetDateTime; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.function.Supplier; + +import com.azure.core.util.logging.ClientLogger; +import com.azure.storage.blob.models.AccessTier; +import com.azure.storage.blob.models.ArchiveStatus; +import com.azure.storage.blob.models.BlobHttpHeaders; +import com.azure.storage.blob.models.BlobProperties; +import com.azure.storage.blob.models.BlobStorageException; +import com.azure.storage.blob.models.BlobType; +import com.azure.storage.blob.models.CopyStatusType; + +/** + * Provides support for attributes associated with a file stored as a blob in Azure Storage. + *

+ * Some of the attributes inherited from {@link BasicFileAttributes} are not supported. See the docs on each method for + * more information. + *

+ * If the target file is a virtual directory, most attributes will be set to null. + */ +public final class AzureBlobFileAttributes implements BasicFileAttributes { + /* + Some blob properties do not have getters as they do not make sense in the context of nio. These properties are: + - incremental snapshot related properties (only for page blobs) + - lease related properties (leases not currently supported) + - sequence number (only for page blobs) + - encryption key sha256 (cpk not supported) + - committed block count (only for append blobs) + */ + + private static final ClientLogger LOGGER = new ClientLogger(AzureBlobFileAttributes.class); + + private final BlobProperties properties; + private final AzureResource resource; + private final boolean isVirtualDirectory; + + AzureBlobFileAttributes(Path path) throws IOException { + this.resource = new AzureResource(path); + BlobProperties props = null; + try { + props = resource.getBlobClient().getProperties(); + } catch (BlobStorageException e) { + if (e.getStatusCode() == 404 && this.resource.checkVirtualDirectoryExists()) { + this.isVirtualDirectory = true; + this.properties = null; + return; + } else { + throw LoggingUtility.logError(LOGGER, new IOException("Path: " + path.toString(), e)); + } + } + this.properties = props; + this.isVirtualDirectory = false; + } + + static Map> getAttributeSuppliers(AzureBlobFileAttributes attributes) { + Map> map = new HashMap<>(); + map.put("creationTime", attributes::creationTime); + map.put("lastModifiedTime", attributes::lastModifiedTime); + map.put("eTag", attributes::eTag); + map.put("blobHttpHeaders", attributes::blobHttpHeaders); + map.put("blobType", attributes::blobType); + map.put("copyId", attributes::copyId); + map.put("copyStatus", attributes::copyStatus); + map.put("copySource", attributes::copySource); + map.put("copyProgress", attributes::copyProgress); + map.put("copyCompletionTime", attributes::copyCompletionTime); + map.put("copyStatusDescription", attributes::copyStatusDescription); + map.put("isServerEncrypted", attributes::isServerEncrypted); + map.put("accessTier", attributes::accessTier); + map.put("isAccessTierInferred", attributes::isAccessTierInferred); + map.put("archiveStatus", attributes::archiveStatus); + map.put("accessTierChangeTime", attributes::accessTierChangeTime); + map.put("metadata", attributes::metadata); + map.put("isRegularFile", attributes::isRegularFile); + map.put("isDirectory", attributes::isDirectory); + map.put("isVirtualDirectory", attributes::isVirtualDirectory); + map.put("isSymbolicLink", attributes::isSymbolicLink); + map.put("isOther", attributes::isOther); + map.put("size", attributes::size); + return map; + } + + /** + * Returns the creation time. The creation time is the time that the file was created. Returns null if this is a + * virtual directory. + * + * @return The creation time or null if this is a virtual directory + */ + @Override + public FileTime creationTime() { + return !this.isVirtualDirectory ? FileTime.from(this.properties.getCreationTime().toInstant()) : null; + } + + /** + * Returns the time of last modification. Returns null if this is a virtual directory + * + * @return the time of last modification or null if this is a virtual directory + */ + @Override + public FileTime lastModifiedTime() { + return !this.isVirtualDirectory ? FileTime.from(this.properties.getLastModified().toInstant()) : null; + } + + /** + * Returns the eTag of the blob or null if this is a virtual directory + * + * @return the eTag of the blob or null if this is a virtual directory + */ + public String eTag() { + return !this.isVirtualDirectory ? this.properties.getETag() : null; + } + + /** + * Returns the {@link BlobHttpHeaders} of the blob or null if this is a virtual directory. + * + * @return {@link BlobHttpHeaders} or null if this is a virtual directory + */ + public BlobHttpHeaders blobHttpHeaders() { + if (this.isVirtualDirectory) { + return null; + } + /* + We return these all as one value, so it's consistent with the way of setting, especially the setAttribute method + that accepts a string argument for the name of the property. Returning them individually would mean we have to + support setting them individually as well, which is not possible due to service constraints. + */ + return new BlobHttpHeaders() + .setContentType(this.properties.getContentType()) + .setContentLanguage(this.properties.getContentLanguage()) + .setContentMd5(this.properties.getContentMd5()) + .setContentDisposition(this.properties.getContentDisposition()) + .setContentEncoding(this.properties.getContentEncoding()) + .setCacheControl(this.properties.getCacheControl()); + } + + /** + * Returns the type of the blob or null if this is a virtual directory + * + * @return the type of the blob or null if this is a virtual directory + */ + public BlobType blobType() { + return !this.isVirtualDirectory ? this.properties.getBlobType() : null; + } + + /** + * Returns the identifier of the last copy operation. If this blob hasn't been the target of a copy operation or has + * been modified since this won't be set. Returns null if this is a virtual directory + * + * @return the identifier of the last copy operation or null if this is a virtual directory + */ + public String copyId() { + return !this.isVirtualDirectory ? this.properties.getCopyId() : null; + } + + /** + * Returns the status of the last copy operation. If this blob hasn't been the target of a copy operation or has + * been modified since this won't be set. Returns null if this is a virtual directory + * + * @return the status of the last copy operation or null if this is a virtual directory + */ + public CopyStatusType copyStatus() { + return !this.isVirtualDirectory ? this.properties.getCopyStatus() : null; + } + + /** + * Returns the source blob URL from the last copy operation. If this blob hasn't been the target of a copy operation + * or has been modified since this won't be set. Returns null if this is a virtual directory + * + * @return the source blob URL from the last copy operation or null if this is a virtual directory + */ + public String copySource() { + return !this.isVirtualDirectory ? this.properties.getCopySource() : null; + } + + /** + * Returns the number of bytes copied and total bytes in the source from the last copy operation (bytes copied/total + * bytes). If this blob hasn't been the target of a copy operation or has been modified since this won't be set. + * Returns null if this is a virtual directory + * + * @return the number of bytes copied and total bytes in the source from the last copy operation null if this is a + * virtual directory + */ + public String copyProgress() { + return !this.isVirtualDirectory ? this.properties.getCopyProgress() : null; + } + + /** + * Returns the completion time of the last copy operation. If this blob hasn't been the target of a copy operation + * or has been modified since this won't be set. Returns null if this is a virtual directory. + * + * @return the completion time of the last copy operation or null if this is a virtual directory + */ + public OffsetDateTime copyCompletionTime() { + return !this.isVirtualDirectory ? this.properties.getCopyCompletionTime() : null; + } + + /** + * Returns the description of the last copy failure, this is set when the {@link #copyStatus() getCopyStatus} is + * {@link CopyStatusType#FAILED failed} or {@link CopyStatusType#ABORTED aborted}. If this blob hasn't been the + * target of a copy operation or has been modified since this won't be set. Returns null if this is a virtual + * directory. + * + * @return the description of the last copy failure or null if this is a virtual directory + */ + public String copyStatusDescription() { + return !this.isVirtualDirectory ? this.properties.getCopyStatusDescription() : null; + } + + /** + * Returns the status of the blob being encrypted on the server or null if this is a virtual directory. + * + * @return the status of the blob being encrypted on the server or null if this is a virtual directory + */ + public Boolean isServerEncrypted() { + return !this.isVirtualDirectory ? this.properties.isServerEncrypted() : null; + } + + /** + * Returns the tier of the blob. This is only set for Page blobs on a premium storage account or for Block blobs on + * blob storage or general purpose V2 account. Returns null if this is a virtual directory. + * + * @return the tier of the blob or null if this is a virtual directory + */ + public AccessTier accessTier() { + return !this.isVirtualDirectory ? this.properties.getAccessTier() : null; + } + + /** + * Returns the status of the tier being inferred for the blob. This is only set for Page blobs on a premium storage + * account or for Block blobs on blob storage or general purpose V2 account. Returns null if this is a virtual + * directory. + * + * @return the status of the tier being inferred for the blob or null if this is a virtual directory + */ + public Boolean isAccessTierInferred() { + return !this.isVirtualDirectory ? this.properties.isAccessTierInferred() : null; + } + + /** + * Returns the archive status of the blob. This is only for blobs on a blob storage and general purpose v2 account. + * Returns null if this is a virtual directory. + * + * @return the archive status of the blob or null if this is a virtual directory + */ + public ArchiveStatus archiveStatus() { + return !this.isVirtualDirectory ? this.properties.getArchiveStatus() : null; + } + + /** + * Returns the time when the access tier for the blob was last changed or null if this is a virtual directory. + * + * @return the time when the access tier for the blob was last changed or null if this is a virtual directory + */ + public OffsetDateTime accessTierChangeTime() { + return !this.isVirtualDirectory ? this.properties.getAccessTierChangeTime() : null; + } + + /** + * Returns the metadata associated with this blob or null if this is a virtual directory. + * + * @return the metadata associated with this blob or null if this is a virtual directory + */ + public Map metadata() { + return !this.isVirtualDirectory ? Collections.unmodifiableMap(this.properties.getMetadata()) : null; + } + + /** + * Returns the time of last modification or null if this is a virtual directory. + *

+ * Last access time is not supported by the blob service. In this case, it is typical for implementations to return + * the {@link #lastModifiedTime()}. + * + * @return the time of last modification or null if this is a virtual directory + */ + @Override + public FileTime lastAccessTime() { + return !this.isVirtualDirectory ? FileTime.from(this.properties.getLastAccessedTime().toInstant()) : null; + } + + /** + * Tells whether the file is a regular file with opaque content. + * + * @return whether the file is a regular file. + */ + @Override + public boolean isRegularFile() { + return !this.isVirtualDirectory + && !this.properties.getMetadata().getOrDefault(AzureResource.DIR_METADATA_MARKER, "false").equals("true"); + } + + /** + * Tells whether the file is a directory. + *

+ * Will return true if the directory is a concrete or virtual directory. See + * {@link AzureFileSystemProvider#createDirectory(Path, FileAttribute[])} for more information on virtual and + * concrete directories. + * + * @return whether the file is a directory + */ + @Override + public boolean isDirectory() { + return !this.isRegularFile(); + } + + /** + * Tells whether the file is a virtual directory. + *

+ * See {@link AzureFileSystemProvider#createDirectory(Path, FileAttribute[])} for more information on virtual and + * concrete directories. + * + * @return whether the file is a virtual directory + */ + public boolean isVirtualDirectory() { + return this.isVirtualDirectory; + } + + /** + * Tells whether the file is a symbolic link. + * + * @return false. Symbolic links are not supported. + */ + @Override + public boolean isSymbolicLink() { + return false; + } + + /** + * Tells whether the file is something other than a regular file, directory, or symbolic link. + * + * @return false. No other object types are supported. + */ + @Override + public boolean isOther() { + return false; + } + + /** + * Returns the size of the file (in bytes). + * + * @return the size of the file + */ + @Override + public long size() { + return !this.isVirtualDirectory ? properties.getBlobSize() : 0; + } + + /** + * Returns the url of the resource. + * + * @return The file key, which is the url. + */ + @Override + public Object fileKey() { + return resource.getBlobClient().getBlobUrl(); + } +} diff --git a/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureDirectoryStream.java b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureDirectoryStream.java new file mode 100644 index 00000000000..817121e958e --- /dev/null +++ b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureDirectoryStream.java @@ -0,0 +1,189 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.nio; + +import java.io.IOException; +import java.nio.file.DirectoryIteratorException; +import java.nio.file.DirectoryStream; +import java.nio.file.Path; +import java.util.HashSet; +import java.util.Iterator; +import java.util.NoSuchElementException; +import java.util.Set; + +import com.azure.core.util.logging.ClientLogger; +import com.azure.storage.blob.BlobContainerClient; +import com.azure.storage.blob.models.BlobItem; +import com.azure.storage.blob.models.BlobListDetails; +import com.azure.storage.blob.models.ListBlobsOptions; + +/** + * A type for iterating over the contents of a directory. + * + * This type is asynchronously closeable, i.e. closing the stream from any thread will cause the stream to stop + * returning elements at that point. + * + * {@inheritDoc} + */ +public final class AzureDirectoryStream implements DirectoryStream { + private static final ClientLogger LOGGER = new ClientLogger(AzureDirectoryStream.class); + + private final AzurePath path; + private final DirectoryStream.Filter filter; + private boolean iteratorRequested = false; + private final AzureDirectoryIterator iterator; + boolean closed = false; + + AzureDirectoryStream(AzurePath path, DirectoryStream.Filter filter) throws IOException { + this.path = path; + this.filter = filter; + this.iterator = new AzureDirectoryIterator(this, this.path, this.filter); + } + + @Override + public Iterator iterator() { + if (this.iteratorRequested) { + throw LoggingUtility.logError(LOGGER, + new IllegalStateException("Only one iterator may be requested from a given directory stream")); + } + this.iteratorRequested = true; + return this.iterator; + } + + @Override + public void close() throws IOException { + this.closed = true; + } + + private static class AzureDirectoryIterator implements Iterator { + private static final ClientLogger LOGGER = new ClientLogger(AzureDirectoryIterator.class); + + private final AzureDirectoryStream parentStream; + private final DirectoryStream.Filter filter; + private final Iterator blobIterator; + private final AzurePath path; + private final Path withoutRoot; + private Path bufferedNext = null; + private final Set directoryPaths; + + AzureDirectoryIterator(AzureDirectoryStream parentStream, AzurePath path, + DirectoryStream.Filter filter) throws IOException { + this.parentStream = parentStream; + this.filter = filter; + this.path = path; + + /* + Resolving two paths requires that either both have a root or neither does. Because the paths returned from + listing will never have a root, we prepare a copy of the list path without a root for quick resolving later. + */ + Path root = this.path.getRoot(); + this.withoutRoot = root == null ? this.path : root.relativize(this.path); + + directoryPaths = new HashSet<>(); + + BlobContainerClient containerClient; + ListBlobsOptions listOptions = new ListBlobsOptions() + .setDetails(new BlobListDetails().setRetrieveMetadata(true)); + if (path.isRoot()) { + String containerName = path.toString().substring(0, path.toString().length() - 1); + AzureFileSystem afs = ((AzureFileSystem) path.getFileSystem()); + containerClient = ((AzureFileStore) afs.getFileStore()).getContainerClient(); + } else { + AzureResource azureResource = new AzureResource(path); + listOptions.setPrefix(azureResource.getBlobClient().getBlobName() + AzureFileSystem.PATH_SEPARATOR); + containerClient = azureResource.getContainerClient(); + } + this.blobIterator = containerClient + .listBlobsByHierarchy(AzureFileSystem.PATH_SEPARATOR, listOptions, null).iterator(); + } + + @Override + public boolean hasNext() { + AzurePath.ensureFileSystemOpen(path); + + // Closing the parent stream halts iteration. + if (parentStream.closed) { + return false; + } + + // In case a customer calls hasNext multiple times in a row. If we've buffered an element, we have a next. + if (this.bufferedNext != null) { + return true; + } + + /* + Search for a new element that passes the filter and buffer it when found. If no such element is found, + return false. + */ + while (this.blobIterator.hasNext()) { + BlobItem nextBlob = this.blobIterator.next(); + Path nextPath = getNextListResult(nextBlob); + try { + if (this.filter.accept(nextPath) && isNotDuplicate(nextPath, nextBlob)) { + this.bufferedNext = nextPath; + return true; + } + } catch (IOException e) { + throw LoggingUtility.logError(LOGGER, new DirectoryIteratorException(e)); + } + } + return false; + } + + @Override + public Path next() { + if (this.bufferedNext == null) { + if (!this.hasNext()) { // This will populate bufferedNext in the process. + throw LoggingUtility.logError(LOGGER, new NoSuchElementException()); + } + } + Path next = this.bufferedNext; // bufferedNext will have been populated by hasNext() + this.bufferedNext = null; + return next; + } + + @Override + public void remove() { + throw LoggingUtility.logError(LOGGER, new UnsupportedOperationException()); + } + + private Path getNextListResult(BlobItem blobItem) { + /* + Listing results return the full blob path, and we don't want to duplicate the path we listed off of, so + we relativize to remove it. + */ + String blobName = blobItem.getName(); + Path relativeResult = this.withoutRoot.relativize( + this.path.getFileSystem().getPath(blobName)); + + // Resolve the cleaned list result against the original path for the final result. + return this.path.resolve(relativeResult); + } + + /* + If there is a concrete directory with children, a given path will be returned twice: once as the marker blob + and once as the prefix for its children. We don't want to return the item twice, and we have no guarantees on + result ordering, so we have to maintain a cache of directory paths we've seen in order to de-dup. + */ + private boolean isNotDuplicate(Path path, BlobItem blob) { + /* + If the blob is not a prefix and the blob does not contain the directory metadata marker, it is a normal blob + and therefore will not be duplicated. + */ + if (!(blob.isPrefix() != null && blob.isPrefix()) + && !(blob.getMetadata() != null && blob.getMetadata().containsKey(AzureResource.DIR_METADATA_MARKER))) { + return true; + } + + // If the set contains this path, it means we've seen it before and we shouldn't return it again. + if (this.directoryPaths.contains(path.toString())) { + return false; + } + + // We haven't seen this before. Track it and indicate it should be returned. + this.directoryPaths.add(path.toString()); + return true; + } + } +} diff --git a/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureFileStore.java b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureFileStore.java new file mode 100644 index 00000000000..bbe361864bc --- /dev/null +++ b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureFileStore.java @@ -0,0 +1,194 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.nio; + +import com.azure.core.util.logging.ClientLogger; +import com.azure.storage.blob.BlobContainerClient; + +import java.io.IOException; +import java.nio.file.FileStore; +import java.nio.file.attribute.FileAttributeView; +import java.nio.file.attribute.FileStoreAttributeView; +import java.util.Objects; + +/** + * An {@code AzureFileStore} is a {@link FileStore} backed by an Azure Blob Storage container. + */ +public final class AzureFileStore extends FileStore { + private static final ClientLogger LOGGER = new ClientLogger(AzureFileStore.class); + + private static final String AZURE_FILE_STORE_TYPE = "AzureBlobContainer"; + + private final AzureFileSystem parentFileSystem; + private final BlobContainerClient containerClient; + + + AzureFileStore(AzureFileSystem parentFileSystem, String containerName, Boolean skipConnectionCheck) + throws IOException { + // A FileStore should only ever be created by a FileSystem. + if (Objects.isNull(parentFileSystem)) { + throw LoggingUtility.logError(LOGGER, new IllegalStateException("AzureFileStore cannot be instantiated " + + "without a parent FileSystem")); + } + this.parentFileSystem = parentFileSystem; + this.containerClient = this.parentFileSystem.getBlobServiceClient().getBlobContainerClient(containerName); + + if (skipConnectionCheck == null || !skipConnectionCheck) { + try { + // This also serves as our connection check. + if (!this.containerClient.exists()) { + this.containerClient.create(); + } + } catch (Exception e) { + throw LoggingUtility.logError(LOGGER, new IOException("There was an error in establishing the existence of " + + "container: " + containerName, e)); + } + } + } + + /** + * Returns the name of the container that underlies this file store. + * + * @return the name of the container that underlies this file store. + */ + @Override + public String name() { + return this.containerClient.getBlobContainerName(); + } + + /** + * Returns the {@code String "AzureBlobContainer"} to indicate that the file store is backed by a remote blob + * container in Azure Storage. + * + * @return {@code "AzureBlobContainer"} + */ + @Override + public String type() { + return AZURE_FILE_STORE_TYPE; + } + + /** + * Always returns false. + *

+ * It may be the case that the authentication method provided to this file system only + * supports read operations and hence the file store is implicitly read only in this view, but that does not + * imply the underlying container/file store is inherently read only. Creating/specifying read only file stores + * is not currently supported. + * + * @return false. + */ + @Override + public boolean isReadOnly() { + return false; + } + + /** + * Returns the size, in bytes, of the file store. + *

+ * Containers do not limit the amount of data stored. This method will always return max long. + * + * @return the size of the file store. + * @throws IOException If an I/O error occurs. + */ + @Override + public long getTotalSpace() throws IOException { + return Long.MAX_VALUE; + } + + /** + * Returns the number of bytes available to this Java virtual machine on the file store. + *

+ * Containers do not limit the amount of data stored. This method will always return max long. + * + * @return the number of bytes available on the file store. + * @throws IOException If an I/O error occurs. + */ + @Override + public long getUsableSpace() throws IOException { + return Long.MAX_VALUE; + } + + /** + * Returns the number of unallocated bytes in the file store. + *

+ * Containers do not limit the amount of data stored. This method will always return max long. + * + * @return the number of unallocated bytes in the file store. + * @throws IOException If an I/O error occurs. + */ + @Override + public long getUnallocatedSpace() throws IOException { + return Long.MAX_VALUE; + } + + /** + * Tells whether this file store supports the file attributes identified by the given file attribute view. + *

+ * All file stores in this file system support the following views: + *

+ * + * @param type the file attribute view type + * @return Whether the file attribute view is supported. + */ + @Override + public boolean supportsFileAttributeView(Class type) { + return AzureFileSystem.SUPPORTED_ATTRIBUTE_VIEWS.containsKey(type); + } + + /** + * Tells whether this file store supports the file attributes identified by the given file attribute view. + *

+ * All file stores in this file system support the following views: + *

+ * + * @param name the name of the file attribute view + * @return whether the file attribute view is supported. + */ + @Override + public boolean supportsFileAttributeView(String name) { + return AzureFileSystem.SUPPORTED_ATTRIBUTE_VIEWS.containsValue(name); + } + + /** + * Returns a FileStoreAttributeView of the given type. + *

+ * This method always returns null as no {@link FileStoreAttributeView} is currently supported. + * + * @param aClass a class + * @return null + */ + @Override + public V getFileStoreAttributeView(Class aClass) { + return null; + } + + /** + * Unsupported. + *

+ * This method always throws an {@code UnsupportedOperationException} as no {@link FileStoreAttributeView} is + * currently supported. + * + * @param s a string + * @return The attribute value. + * @throws UnsupportedOperationException unsupported + * @throws IOException never + */ + @Override + public Object getAttribute(String s) throws IOException { + throw LoggingUtility.logError(LOGGER, new UnsupportedOperationException("FileStoreAttributeViews aren't" + + " supported.")); + } + + BlobContainerClient getContainerClient() { + return this.containerClient; + } +} diff --git a/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureFileSystem.java b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureFileSystem.java new file mode 100644 index 00000000000..8ca4361bd3e --- /dev/null +++ b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureFileSystem.java @@ -0,0 +1,534 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.nio; + +import java.io.IOException; +import java.nio.file.FileStore; +import java.nio.file.FileSystem; +import java.nio.file.InvalidPathException; +import java.nio.file.Path; +import java.nio.file.PathMatcher; +import java.nio.file.WatchService; +import java.nio.file.attribute.BasicFileAttributeView; +import java.nio.file.attribute.FileAttributeView; +import java.nio.file.attribute.UserPrincipalLookupService; +import java.nio.file.spi.FileSystemProvider; +import java.time.Duration; +import java.time.Instant; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.regex.PatternSyntaxException; + +import com.azure.core.credential.AzureSasCredential; +import com.azure.core.http.HttpClient; +import com.azure.core.http.policy.HttpLogDetailLevel; +import com.azure.core.http.policy.HttpPipelinePolicy; +import com.azure.core.util.CoreUtils; +import com.azure.core.util.logging.ClientLogger; +import com.azure.storage.blob.BlobServiceClient; +import com.azure.storage.blob.BlobServiceClientBuilder; +import com.azure.storage.blob.implementation.util.BlobUserAgentModificationPolicy; +import com.azure.storage.common.StorageSharedKeyCredential; +import com.azure.storage.common.policy.RequestRetryOptions; +import com.azure.storage.common.policy.RetryPolicyType; + +/** + * Implement's Java's {@link FileSystem} interface for Azure Blob Storage. + *

+ * The following behavior is specific to this FileSystem: + *

+ * In the hierarchy of this file system, an {@code AzureFileSystem} corresponds to an Azure Blob Storage account. A + * file store is represented by a container in the storage account. Each container has one root directory. + *

+ * Closing the file system will not block on outstanding operations. Any operations in progress will be allowed to + * terminate naturally after the file system is closed, though no further operations may be started after the parent + * file system is closed. + *

+ * All instance of {@code AzureFileSystem} are opened for read-write access. + *

+ * For a more complete description of the uses for the constants described here, please see the instructions for opening + * and configuring a FileSystem in the docs of {@link FileSystemProvider}. + */ +public final class AzureFileSystem extends FileSystem { + private static final ClientLogger LOGGER = new ClientLogger(AzureFileSystem.class); + + // Configuration constants for blob clients. + /** + * Expected type: String + */ + public static final String AZURE_STORAGE_SHARED_KEY_CREDENTIAL = "AzureStorageSharedKeyCredential"; + + /** + * Expected type: String + */ + public static final String AZURE_STORAGE_SAS_TOKEN_CREDENTIAL = "AzureStorageSasTokenCredential"; + + /** + * Expected type: String + */ + public static final String AZURE_STORAGE_PUBLIC_ACCESS_CREDENTIAL = "AzureStoragePublicAccessCredential"; + + /** + * Expected type: com.azure.core.http.policy.HttpLogLevelDetail + */ + public static final String AZURE_STORAGE_HTTP_LOG_DETAIL_LEVEL = "AzureStorageHttpLogDetailLevel"; + + /** + * Expected type: Integer + */ + public static final String AZURE_STORAGE_MAX_TRIES = "AzureStorageMaxTries"; + + /** + * Expected type: Integer + */ + public static final String AZURE_STORAGE_TRY_TIMEOUT = "AzureStorageTryTimeout"; + + /** + * Expected type: Long + */ + public static final String AZURE_STORAGE_RETRY_DELAY_IN_MS = "AzureStorageRetryDelayInMs"; + + /** + * Expected type: Long + */ + public static final String AZURE_STORAGE_MAX_RETRY_DELAY_IN_MS = "AzureStorageMaxRetryDelayInMs"; + + /** + * Expected type: com.azure.storage.common.policy.RetryPolicyType + */ + public static final String AZURE_STORAGE_RETRY_POLICY_TYPE = "AzureStorageRetryPolicyType"; + + /** + * Expected type: String + */ + public static final String AZURE_STORAGE_SECONDARY_HOST = "AzureStorageSecondaryHost"; + + /** + * Expected type: Long + */ + public static final String AZURE_STORAGE_UPLOAD_BLOCK_SIZE = "AzureStorageUploadBlockSize"; + + /** + * Expected type: Integer + */ + public static final String AZURE_STORAGE_MAX_CONCURRENCY_PER_REQUEST = "AzureStorageMaxConcurrencyPerRequest"; + + /** + * Expected type: Long + */ + public static final String AZURE_STORAGE_PUT_BLOB_THRESHOLD = "AzureStoragePutBlobThreshold"; + + /** + * Expected type: Integer + */ + public static final String AZURE_STORAGE_DOWNLOAD_RESUME_RETRIES = "AzureStorageDownloadResumeRetries"; + + static final String AZURE_STORAGE_HTTP_CLIENT = "AzureStorageHttpClient"; // undocumented; for test. + static final String AZURE_STORAGE_HTTP_POLICIES = "AzureStorageHttpPolicies"; // undocumented; for test. + + /** + * Expected type: String + */ + public static final String AZURE_STORAGE_FILE_STORES = "AzureStorageFileStores"; + + /** + * Expected type: Boolean + */ + public static final String AZURE_STORAGE_SKIP_INITIAL_CONTAINER_CHECK = "AzureStorageSkipInitialContainerCheck"; + + static final String PATH_SEPARATOR = "/"; + + private static final Map PROPERTIES = + CoreUtils.getProperties("azure-storage-blob-nio.properties"); + private static final String SDK_NAME = "name"; + private static final String SDK_VERSION = "version"; + private static final String CLIENT_NAME = PROPERTIES.getOrDefault(SDK_NAME, "UnknownName"); + private static final String CLIENT_VERSION = PROPERTIES.getOrDefault(SDK_VERSION, "UnknownVersion"); + + static final Map, String> SUPPORTED_ATTRIBUTE_VIEWS; + static { + Map, String> map = new HashMap<>(); + map.put(BasicFileAttributeView.class, "basic"); + map.put(AzureBasicFileAttributeView.class, "azureBasic"); + map.put(AzureBlobFileAttributeView.class, "azureBlob"); + SUPPORTED_ATTRIBUTE_VIEWS = Collections.unmodifiableMap(map); + } + + private final AzureFileSystemProvider parentFileSystemProvider; + private final BlobServiceClient blobServiceClient; + private final Long blockSize; + private final Long putBlobThreshold; + private final Integer maxConcurrencyPerRequest; + private final Integer downloadResumeRetries; + private FileStore defaultFileStore; + private boolean closed; + + private AzureSasCredential currentActiveSasCredential; + private Instant expiry; + + AzureFileSystem(AzureFileSystemProvider parentFileSystemProvider, String endpoint, Map config) + throws IOException { + // A FileSystem should only ever be instantiated by a provider. + if (Objects.isNull(parentFileSystemProvider)) { + throw LoggingUtility.logError(LOGGER, new IllegalArgumentException("AzureFileSystem cannot be instantiated" + + " without a parent FileSystemProvider")); + } + this.parentFileSystemProvider = parentFileSystemProvider; + + // Read configurations and build client. + try { + this.blobServiceClient = this.buildBlobServiceClient(endpoint, config); + this.blockSize = (Long) config.get(AZURE_STORAGE_UPLOAD_BLOCK_SIZE); + this.putBlobThreshold = (Long) config.get(AZURE_STORAGE_PUT_BLOB_THRESHOLD); + this.maxConcurrencyPerRequest = (Integer) config.get(AZURE_STORAGE_MAX_CONCURRENCY_PER_REQUEST); + this.downloadResumeRetries = (Integer) config.get(AZURE_STORAGE_DOWNLOAD_RESUME_RETRIES); + this.currentActiveSasCredential = (AzureSasCredential) config.get(AZURE_STORAGE_SAS_TOKEN_CREDENTIAL); + + // Initialize and ensure access to FileStores. + this.defaultFileStore = this.initializeFileStore(config); + } catch (RuntimeException e) { + throw LoggingUtility.logError(LOGGER, new IllegalArgumentException("There was an error parsing the " + + "configurations map. Please ensure all fields are set to a legal value of the correct type.", e)); + } catch (IOException e) { + throw LoggingUtility.logError(LOGGER, + new IOException("Initializing FileStores failed. FileSystem could not be opened.", e)); + } + + this.closed = false; + } + + /** + * Returns the provider that created this file system. + * + * @return the provider that created this file system. + */ + @Override + public FileSystemProvider provider() { + return this.parentFileSystemProvider; + } + + /** + * Closes this file system. + *

+ * After a file system is closed then all subsequent access to the file system, either by methods defined by this + * class or on objects associated with this file system, throw ClosedFileSystemException. If the file system is + * already closed then invoking this method has no effect. + *

+ * Closing the file system will not block on outstanding operations. Any operations in progress will be allowed to + * terminate naturally after the file system is closed, though no further operations may be started after the + * parent file system is closed. + *

+ * Once closed, a file system with the same identifier as the one closed may be re-opened. + * + * @throws IOException If an I/O error occurs. + */ + @Override + public void close() throws IOException { + this.closed = true; + this.parentFileSystemProvider.closeFileSystem(this.getFileSystemUrl() + "/" + defaultFileStore.name()); + } + + /** + * Tells whether this file system is open. + * + * @return whether this file system is open. + */ + @Override + public boolean isOpen() { + return !this.closed; + } + + /** + * Tells whether this file system allows only read-only access to its file stores. + *

+ * Always returns false. It may be the case that the authentication method provided to this file system only + * supports read operations and hence the file system is implicitly read only in this view, but that does not + * imply the underlying account/file system is inherently read only. Creating/specifying read only file + * systems is not supported. + * + * @return false + */ + @Override + public boolean isReadOnly() { + return false; + } + + /** + * Returns the name separator, represented as a string. + *

+ * The separator used in this file system is {@code "/"}. + * + * @return "/" + */ + @Override + public String getSeparator() { + return AzureFileSystem.PATH_SEPARATOR; + } + + /** + * Returns an object to iterate over the paths of the root directories. + *

+ * The list of root directories corresponds to the list of available file stores and therefore containers specified + * upon initialization. A root directory always takes the form {@code ":"}. This list will + * respect the parameters provided during initialization. + *

+ * If a finite list of containers was provided on start up, this list will not change during the lifetime of this + * object. If containers are added to the account after initialization, they will be ignored. If a container is + * deleted or otherwise becomes unavailable, its root directory will still be returned but operations to it will + * fail. + * + * @return an object to iterate over the paths of the root directories + */ + @Override + public Iterable getRootDirectories() { + /* + Should we add different initialization options later: + If the file system was set to use all containers in the account, the account will be re-queried and the + list may grow or shrink if containers were added or deleted. + */ + return Arrays.asList(this.getPath(defaultFileStore.name() + AzurePath.ROOT_DIR_SUFFIX)); + } + + /** + * Returns an object to iterate over the underlying file stores + *

+ * This list will respect the parameters provided during initialization. + *

+ * If a finite list of containers was provided on start up, this list will not change during the lifetime of this + * object. If containers are added to the account after initialization, they will be ignored. If a container is + * deleted or otherwise becomes unavailable, its root directory will still be returned but operations to it will + * fail. + */ + @Override + public Iterable getFileStores() { + /* + Should we add different initialization options later: + If the file system was set to use all containers in the account, the account will be re-queried and the + list may grow or shrink if containers were added or deleted. + */ + return Arrays.asList(defaultFileStore); + } + + /** + * Returns the set of the names of the file attribute views supported by this FileSystem. + *

+ * This file system supports the following views: + *

+ */ + @Override + public Set supportedFileAttributeViews() { + return new HashSet<>(SUPPORTED_ATTRIBUTE_VIEWS.values()); + } + + /** + * Converts a path string, or a sequence of more that when joined form a path string, to a Path. + *

+ * If more does not specify any elements then the value of the first parameter is the path string to convert. If + * more specifies one or more elements than each non-empty string, including first, is considered to be a sequence + * of name elements (see Path) and is joined to form a path string. The more will be joined using the name + * separator. + *

+ * Each name element will be {@code String}-joined to the other elements by this file system's first path separator. + * Naming conventions and allowed characters are as + * defined + * by the Azure Blob Storage service. The root component is interpreted as the container name and all name elements + * are interpreted as a part of the blob name. The character {@code ':'} is only allowed in the root component and + * must be the last character of the root component. + * + * @param first the path string or initial part of the path string + * @param more additional strings to be joined to form the path string + * @throws InvalidPathException if the path string cannot be converted. + */ + @Override + public Path getPath(String first, String... more) { + return new AzurePath(this, first, more); + } + + /** + * Unsupported. + * + * @param s the matcher + * @throws UnsupportedOperationException unsupported. + * @throws IllegalArgumentException never + * @throws PatternSyntaxException never + */ + @Override + public PathMatcher getPathMatcher(String s) throws IllegalArgumentException, PatternSyntaxException { + throw LoggingUtility.logError(LOGGER, new UnsupportedOperationException()); + } + + /** + * Unsupported. + * + * @throws UnsupportedOperationException unsupported. + */ + @Override + public UserPrincipalLookupService getUserPrincipalLookupService() { + throw LoggingUtility.logError(LOGGER, new UnsupportedOperationException()); + } + + /** + * Unsupported. + * + * @throws UnsupportedOperationException unsupported. + * @throws IOException Never thrown. + */ + @Override + public WatchService newWatchService() throws IOException { + throw LoggingUtility.logError(LOGGER, new UnsupportedOperationException()); + } + + String getFileSystemUrl() { + return this.blobServiceClient.getAccountUrl(); + } + + BlobServiceClient getBlobServiceClient() { + return this.blobServiceClient; + } + + private BlobServiceClient buildBlobServiceClient(String endpoint, Map config) { + BlobServiceClientBuilder builder = new BlobServiceClientBuilder() + .endpoint(endpoint); + + // Set the credentials. + if (config.containsKey(AZURE_STORAGE_SHARED_KEY_CREDENTIAL)) { + builder.credential((StorageSharedKeyCredential) config.get(AZURE_STORAGE_SHARED_KEY_CREDENTIAL)); + } else if (config.containsKey(AZURE_STORAGE_SAS_TOKEN_CREDENTIAL)) { + builder.credential((AzureSasCredential) config.get(AZURE_STORAGE_SAS_TOKEN_CREDENTIAL)); + this.setExpiryFromSAS((AzureSasCredential) config.get(AZURE_STORAGE_SAS_TOKEN_CREDENTIAL)); + } else if (config.containsKey(AZURE_STORAGE_PUBLIC_ACCESS_CREDENTIAL)) { + // The Blob Service Client Builder requires at least one kind of authentication to make requests + // For public files however, this is unnecessary. This key-value pair is to denote the case + // explicitly when we supply a placeholder SAS credential to bypass this requirement. + builder.credential((AzureSasCredential) config.get(AZURE_STORAGE_PUBLIC_ACCESS_CREDENTIAL)); + } else { + throw LoggingUtility.logError(LOGGER, new IllegalArgumentException(String.format("No credentials were " + + "provided. Please specify one of the following when constructing an AzureFileSystem: %s, %s.", + AZURE_STORAGE_SHARED_KEY_CREDENTIAL, AZURE_STORAGE_SAS_TOKEN_CREDENTIAL))); + } + + // Configure options and client. + builder.httpLogOptions(BlobServiceClientBuilder.getDefaultHttpLogOptions() + .setLogLevel((HttpLogDetailLevel) config.get(AZURE_STORAGE_HTTP_LOG_DETAIL_LEVEL))); + + RequestRetryOptions retryOptions = new RequestRetryOptions( + (RetryPolicyType) config.get(AZURE_STORAGE_RETRY_POLICY_TYPE), + (Integer) config.get(AZURE_STORAGE_MAX_TRIES), + (Integer) config.get(AZURE_STORAGE_TRY_TIMEOUT), + (Long) config.get(AZURE_STORAGE_RETRY_DELAY_IN_MS), + (Long) config.get(AZURE_STORAGE_MAX_RETRY_DELAY_IN_MS), + (String) config.get(AZURE_STORAGE_SECONDARY_HOST)); + builder.retryOptions(retryOptions); + + builder.httpClient((HttpClient) config.get(AZURE_STORAGE_HTTP_CLIENT)); + + // Add BlobUserAgentModificationPolicy + builder.addPolicy(new BlobUserAgentModificationPolicy(CLIENT_NAME, CLIENT_VERSION)); + + if (config.containsKey(AZURE_STORAGE_HTTP_POLICIES)) { + for (HttpPipelinePolicy policy : (HttpPipelinePolicy[]) config.get(AZURE_STORAGE_HTTP_POLICIES)) { + builder.addPolicy(policy); + } + } + + return builder.buildClient(); + } + + private FileStore initializeFileStore(Map config) throws IOException { + String fileStoreName = (String) config.get(AZURE_STORAGE_FILE_STORES); + if (CoreUtils.isNullOrEmpty(fileStoreName)) { + throw LoggingUtility.logError(LOGGER, new IllegalArgumentException("The list of FileStores cannot be " + + "null.")); + } + + Boolean skipConnectionCheck = (Boolean) config.get(AZURE_STORAGE_SKIP_INITIAL_CONTAINER_CHECK); + Map fileStores = new HashMap<>(); + this.defaultFileStore = new AzureFileStore(this, fileStoreName, skipConnectionCheck); + return this.defaultFileStore; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + AzureFileSystem that = (AzureFileSystem) o; + return Objects.equals(this.getFileSystemUrl(), that.getFileSystemUrl()); + } + + @Override + public int hashCode() { + return Objects.hash(this.getFileSystemUrl()); + } + + Path getDefaultDirectory() { + return this.getPath(this.defaultFileStore.name() + AzurePath.ROOT_DIR_SUFFIX); + } + + FileStore getFileStore() throws IOException { + if (this.defaultFileStore == null) { + throw LoggingUtility.logError(LOGGER, new IOException("FileStore not initialized")); + } + return defaultFileStore; + } + + Long getBlockSize() { + return this.blockSize; + } + + Long getPutBlobThreshold() { + return this.putBlobThreshold; + } + + Integer getMaxConcurrencyPerRequest() { + return this.maxConcurrencyPerRequest; + } + + public String createSASAppendedURL(String url) throws IllegalStateException { + if (Objects.isNull(currentActiveSasCredential)) { + throw new IllegalStateException("No current active SAS credential present"); + } + return url + "?" + currentActiveSasCredential.getSignature(); + } + + public Optional getExpiry() { + return Optional.ofNullable(expiry); + } + + private void setExpiryFromSAS(AzureSasCredential token) { + List strings = Arrays.asList(token.getSignature().split("&")); + Optional expiryString = strings.stream() + .filter(s -> s.startsWith("se")) + .findFirst() + .map(s -> s.replaceFirst("se=","")) + .map(s -> s.replace("%3A", ":")); + this.expiry = expiryString.map(es -> Instant.parse(es)).orElse(null); + } + + /** + * Return true if this filesystem has SAS credentials with an expiration data attached, and we're within + * `buffer` of the expiration. Return false if our credentials don't come with an expiration, or we + * aren't within `buffer` of our expiration. + */ + public boolean isExpired(Duration buffer) { + return Optional.ofNullable(this.expiry) + .map(e -> Instant.now().plus(buffer).isAfter(e)) + .orElse(false); + + } +} diff --git a/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureFileSystemProvider.java b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureFileSystemProvider.java new file mode 100644 index 00000000000..2066acf89d5 --- /dev/null +++ b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureFileSystemProvider.java @@ -0,0 +1,1197 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.nio; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.UncheckedIOException; +import java.net.HttpURLConnection; +import java.net.URI; +import java.nio.channels.SeekableByteChannel; +import java.nio.file.AccessDeniedException; +import java.nio.file.AccessMode; +import java.nio.file.CopyOption; +import java.nio.file.DirectoryNotEmptyException; +import java.nio.file.DirectoryStream; +import java.nio.file.FileAlreadyExistsException; +import java.nio.file.FileStore; +import java.nio.file.FileSystem; +import java.nio.file.FileSystemAlreadyExistsException; +import java.nio.file.FileSystemNotFoundException; +import java.nio.file.Files; +import java.nio.file.LinkOption; +import java.nio.file.NoSuchFileException; +import java.nio.file.NotDirectoryException; +import java.nio.file.OpenOption; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.nio.file.StandardOpenOption; +import java.nio.file.attribute.BasicFileAttributeView; +import java.nio.file.attribute.BasicFileAttributes; +import java.nio.file.attribute.FileAttribute; +import java.nio.file.attribute.FileAttributeView; +import java.nio.file.spi.FileSystemProvider; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.function.Consumer; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +import com.azure.core.util.CoreUtils; +import com.azure.core.util.logging.ClientLogger; +import com.azure.core.util.polling.SyncPoller; +import com.azure.storage.blob.models.BlobCopyInfo; +import com.azure.storage.blob.models.BlobErrorCode; +import com.azure.storage.blob.models.BlobRequestConditions; +import com.azure.storage.blob.models.BlobStorageException; +import com.azure.storage.blob.models.ParallelTransferOptions; + +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; + +/** + * The {@code AzureFileSystemProvider} is Azure Storage's implementation of the nio interface on top of Azure Blob + * Storage. + *

+ * Particular care should be taken when working with a remote storage service. This implementation makes no guarantees + * on behavior or state should other processes operate on the same data concurrently; file systems from this provider + * will assume they have exclusive access to their data and will behave without regard for potential of interfering + * applications. Moreover, remote file stores introduce higher latencies. Therefore, additional consideration should be + * given to managing concurrency: race conditions are more likely to manifest and network failures occur more frequently + * than disk failures. These and other such distributed application scenarios must be considered when working with this + * file system. While the {@code AzureFileSystem} will ensure it takes appropriate steps towards robustness and + * reliability, the application developer must design around these failure scenarios and have fallback and retry options + * available. + *

+ * The Azure Blob Storage service backing these APIs is not a true FileSystem, nor is it the goal of this implementation + * to force Azure Blob Storage to act like a full-fledged file system. Some APIs and scenarios will remain unsupported + * indefinitely until they may be sensibly implemented. Other APIs may experience lower performance than is expected + * because of the number of network requests needed to ensure correctness. The javadocs for each type and method should + * also be read carefully to understand what guarantees are made and how they may differ from the contract defined by + * {@link FileSystemProvider}. + *

+ * The scheme for this provider is {@code "azb"}, and the format of the URI to identify an {@code AzureFileSystem} is + * {@code "azb://?endpoint="}. The endpoint of the Storage account is used to uniquely identify the + * filesystem. + *

+ * An {@link AzureFileSystem} is backed by an account. An {@link AzureFileStore} is backed by a container. Any number of + * containers may be specified as file stores upon creation of the file system. When a file system is created, + * it will try to retrieve the properties of each container to ensure connection to the account. If any of the + * containers does not exist, it will be created. Failure to access or create containers as necessary will result in + * an exception and failure to create the file system. Any data existing in the containers will be preserved and + * accessible via the file system, though customers should be aware that it must be in a format understandable by + * the types in this package or behavior will be undefined. + *

+ * {@link #newFileSystem(URI, Map)} will check for the following keys in the configuration map and expect the named + * types. Any entries not listed here will be ignored. Note that {@link AzureFileSystem} has public constants defined + * for each of the keys for convenience. Most values are documented in the blob package. Any values which are unique to + * nio will be documented here. + *

+ *

+ * Either an account key or a sas token must be specified. If both are provided, the account key will be preferred. If + * a sas token is specified, the customer must take care that it has appropriate permissions to perform the actions + * demanded of the file system in a given workflow, including the initial connection check specified above. The same + * token will be applied to all operations. + *

+ * An iterable of file stores must also be provided; each entry should simply be the name of a container. The first + * container listed will be considered the default file store and the root directory of which will be the file system's + * default directory. All other values listed are used to configure the underlying + * {@link com.azure.storage.blob.BlobServiceClient}. Please refer to that type for more information on these values. + * + * @see FileSystemProvider + */ +public final class AzureFileSystemProvider extends FileSystemProvider { + /* + * A static inner class is used to hold the ClientLogger for AzureFileSystemProvider to defer creating the + * ClientLogger until logging is needed. Some implementations of SLF4J may make calls to load FileSystemProviders + * which results in a load FileSystemProviders to occur during a call to load FileSystemProviders. This results in + * the JVM to throw an exception that a circular call to load FileSystemProviders has occurred. + */ + private static final class ClientLoggerHolder { + private static final ClientLogger LOGGER = new ClientLogger(AzureFileSystemProvider.class); + } + + /** + * A helper for setting the HTTP properties when creating a directory. + */ + public static final String CONTENT_TYPE = "Content-Type"; + + /** + * A helper for setting the HTTP properties when creating a directory. + */ + public static final String CONTENT_DISPOSITION = "Content-Disposition"; + + /** + * A helper for setting the HTTP properties when creating a directory. + */ + public static final String CONTENT_LANGUAGE = "Content-Language"; + + /** + * A helper for setting the HTTP properties when creating a directory. + */ + public static final String CONTENT_ENCODING = "Content-Encoding"; + + /** + * A helper for setting the HTTP properties when creating a directory. + */ + public static final String CONTENT_MD5 = "Content-MD5"; + + /** + * A helper for setting the HTTP properties when creating a directory. + */ + public static final String CACHE_CONTROL = "Cache-Control"; + + private static final String ENDPOINT_QUERY_KEY = "endpoint"; + private static final int COPY_TIMEOUT_SECONDS = 30; + private static final Set OUTPUT_STREAM_DEFAULT_OPTIONS = + Collections.unmodifiableSet(new HashSet<>(Arrays.asList(StandardOpenOption.CREATE, + StandardOpenOption.WRITE, + StandardOpenOption.TRUNCATE_EXISTING))); + private static final Set OUTPUT_STREAM_SUPPORTED_OPTIONS = + Collections.unmodifiableSet(new HashSet<>(Arrays.asList( + StandardOpenOption.CREATE_NEW, + StandardOpenOption.CREATE, + StandardOpenOption.WRITE, + // Though we don't actually truncate, the same result is achieved by overwriting the destination. + StandardOpenOption.TRUNCATE_EXISTING))); + + private final ConcurrentMap openFileSystems; + + + // Specs require a public zero argument constructor. + /** + * Creates an AzureFileSystemProvider. + */ + public AzureFileSystemProvider() { + this.openFileSystems = new ConcurrentHashMap<>(); + } + + /** + * Returns the URI scheme that identifies this provider: {@code "azb".} + * + * @return {@code "azb"} + */ + @Override + public String getScheme() { + return "azb"; + } + + /** + * Constructs a new FileSystem object identified by a URI. + *

+ * The format of a {@code URI} identifying a file system is {@code "azb://?endpoint="}. + *

+ * Once closed, a file system with the same identifier may be reopened. + * + * @param uri URI reference + * @param config A map of provider specific properties to configure the file system + * @return a new file system. + * @throws IllegalArgumentException If the pre-conditions for the uri parameter aren't met, or the env parameter + * does not contain properties required by the provider, or a property value is invalid. + * @throws IOException If an I/O error occurs. + * @throws SecurityException never + * @throws FileSystemAlreadyExistsException If the file system has already been created. + */ + @Override + public FileSystem newFileSystem(URI uri, Map config) throws IOException { + String endpoint = extractAccountEndpoint(uri); + + if (this.openFileSystems.containsKey(endpoint)) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new FileSystemAlreadyExistsException("Name: " + endpoint)); + } + + AzureFileSystem afs = new AzureFileSystem(this, endpoint, config); + this.openFileSystems.put(endpoint, afs); + + return afs; + } + + /** + * Returns an existing FileSystem created by this provider. + *

+ * The format of a {@code URI} identifying a file system is {@code "azb://?endpoint=<endpoint>"}. + *

+ * Trying to retrieve a closed file system will throw a {@link FileSystemNotFoundException}. Once closed, a + * file system with the same identifier may be reopened. + * + * @param uri URI reference + * @return the file system + * @throws IllegalArgumentException If the pre-conditions for the uri parameter aren't met + * @throws FileSystemNotFoundException If the file system already exists + * @throws SecurityException never + */ + @Override + public FileSystem getFileSystem(URI uri) { + String endpoint = extractAccountEndpoint(uri); + if (!this.openFileSystems.containsKey(endpoint)) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new FileSystemNotFoundException("Name: " + endpoint)); + } + return this.openFileSystems.get(endpoint); + } + + /** + * Return a Path object by converting the given URI. The resulting Path is associated with a FileSystem that already + * exists. + * + * @param uri The URI to convert + * @return The path identified by the URI. + * @throws IllegalArgumentException If the URI scheme does not identify this provider or other preconditions on the + * uri parameter do not hold + * @throws FileSystemNotFoundException if the file system identified by the query does not exist + * @throws SecurityException never + * + * @see #getFileSystem(URI) for information on the URI format + */ + @Override + public Path getPath(URI uri) { + return getFileSystem(uri).getPath(uri.getPath()); + } + + /** + * Opens or creates a file, returning a seekable byte channel to access the file. + *

+ * This method is primarily offered to support some jdk convenience methods such as + * {@link Files#createFile(Path, FileAttribute[])} which requires opening a channel and closing it. A channel may + * only be opened in read mode OR write mode. It may not be opened in read/write mode. Seeking is supported for + * reads, but not for writes. Modifications to existing files is not permitted--only creating new files or + * overwriting existing files. + *

+ * This type is not threadsafe to prevent having to hold locks across network calls. + * + * @param path the path of the file to open + * @param set options specifying how the file should be opened + * @param fileAttributes an optional list of file attributes to set atomically when creating the directory + * @return a new seekable byte channel + * @throws UnsupportedOperationException Operation is not supported. + * @throws IllegalArgumentException if the set contains an invalid combination of options + * @throws FileAlreadyExistsException if a file of that name already exists and the CREATE_NEW option is specified + * (optional specific exception) + * @throws IOException If an I/O error occurs. + * @throws SecurityException never + */ + @Override + public SeekableByteChannel newByteChannel(Path path, Set set, + FileAttribute... fileAttributes) throws IOException { + if (Objects.isNull(set)) { + set = Collections.emptySet(); + } + + if (set.contains(StandardOpenOption.WRITE)) { + return new AzureSeekableByteChannel( + (NioBlobOutputStream) this.newOutputStreamInternal(path, set, fileAttributes), path); + } else { + return new AzureSeekableByteChannel( + (NioBlobInputStream) this.newInputStream(path, set.toArray(new OpenOption[0])), path); + } + } + + /** + * Opens an {@link InputStream} to the given path. + *

+ * The stream will not attempt to read or buffer the entire file. However, when fetching data, it will always + * request the same size chunk of several MB to prevent network thrashing on small reads. Mark and reset are + * supported. + *

+ * Only {@link StandardOpenOption#READ} is supported. Any other option will throw. + * + * @param path the path to the file to open + * @param options options specifying how the file is opened + * @return a new input stream + * @throws IllegalArgumentException if an invalid combination of options is specified + * @throws UnsupportedOperationException if an unsupported option is specified + * @throws IOException If an I/O error occurs. + * @throws SecurityException never + */ + @Override + public InputStream newInputStream(Path path, OpenOption... options) throws IOException { + // Validate options. Only read is supported. + if (options.length > 1 || (options.length > 0 && !options[0].equals(StandardOpenOption.READ))) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new UnsupportedOperationException("Only the read option is supported.")); + } + + AzureResource resource = new AzureResource(path); + AzurePath.ensureFileSystemOpen(resource.getPath()); + + // Ensure the path points to a file. + if (!resource.checkDirStatus().equals(DirectoryStatus.NOT_A_DIRECTORY)) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new IOException("Path either does not exist or points to a directory." + + "Path must point to a file. Path: " + path.toString())); + } + + // Note that methods on BlobInputStream are already synchronized. + return new NioBlobInputStream(resource.getBlobClient().openInputStream(), resource.getPath()); + } + + /** + * Opens an {@link OutputStream} to the given path. The resulting file will be stored as a block blob. + *

+ * The only supported options are {@link StandardOpenOption#CREATE}, {@link StandardOpenOption#CREATE_NEW}, + * {@link StandardOpenOption#WRITE}, {@link StandardOpenOption#TRUNCATE_EXISTING}. Any other options will throw an + * {@link UnsupportedOperationException}. {@code WRITE} and {@code TRUNCATE_EXISTING} must be specified or an + * {@link IllegalArgumentException} will be thrown. Hence, files cannot be updated, only overwritten completely. + *

+ * This stream will not attempt to buffer the entire file, however some buffering will be done for potential + * optimizations and to avoid network thrashing. Specifically, up to + * {@link AzureFileSystem#AZURE_STORAGE_PUT_BLOB_THRESHOLD} bytes will be buffered initially. If that threshold is + * exceeded, the data will be broken into chunks and sent in blocks, and writes will be buffered into sizes of + * {@link AzureFileSystem#AZURE_STORAGE_UPLOAD_BLOCK_SIZE}. The maximum number of buffers of this size to be + * allocated is defined by {@link AzureFileSystem#AZURE_STORAGE_MAX_CONCURRENCY_PER_REQUEST}, which also configures + * the level of parallelism with which we may write and thus may affect write speeds as well. + *

+ * The data is only committed when the steam is closed. Hence, data cannot be read from the destination until the + * stream is closed. When the close method returns, it is guaranteed that, barring any errors, the data is finalized + * and available for reading. + *

+ * Writing happens asynchronously. Bytes passed for writing are stored until either the threshold or block size are + * met at which time they are sent to the service. When the write method returns, there is no guarantee about which + * phase of this process the data is in other than it has been accepted and will be written. Again, closing will + * guarantee that the data is written and available. + *

+ * Flush is a no-op as regards data transfers, but it can be used to check the state of the stream for errors. + * This can be a useful tool because writing happens asynchronously, and therefore an error from a previous write + * may not otherwise be thrown unless the stream is flushed, closed, or written to again. + * + * @param path the path to the file to open or create + * @param options options specifying how the file is opened + * @return a new output stream + * @throws IllegalArgumentException if an invalid combination of options is specified + * @throws UnsupportedOperationException if an unsupported option is specified + * @throws IOException If an I/O error occurs. + * @throws SecurityException never + */ + @Override + public OutputStream newOutputStream(Path path, OpenOption... options) throws IOException { + return newOutputStreamInternal(path, new HashSet<>(Arrays.asList(options))); + } + + OutputStream newOutputStreamInternal(Path path, Set optionsSet, + FileAttribute... fileAttributes) throws IOException { + // If options are empty, add Create, Write, TruncateExisting as defaults per nio docs. + if (optionsSet == null || optionsSet.size() == 0) { + optionsSet = OUTPUT_STREAM_DEFAULT_OPTIONS; + } + + // Check for unsupported options. + for (OpenOption option : optionsSet) { + if (!OUTPUT_STREAM_SUPPORTED_OPTIONS.contains(option)) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new UnsupportedOperationException("Unsupported option: " + option.toString())); + } + } + + /* + Write must be specified. Either create_new or truncate must be specified. This is to ensure that no edits or + appends are allowed. + */ + if (!optionsSet.contains(StandardOpenOption.WRITE) + || !(optionsSet.contains(StandardOpenOption.TRUNCATE_EXISTING) + || optionsSet.contains(StandardOpenOption.CREATE_NEW))) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new IllegalArgumentException("Write and either CreateNew or TruncateExisting must be specified to open " + + "an OutputStream")); + } + + AzureResource resource = new AzureResource(path); + AzurePath.ensureFileSystemOpen(resource.getPath()); + DirectoryStatus status = resource.checkDirStatus(); + + // Cannot write to a directory. + if (DirectoryStatus.isDirectory(status)) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new IOException("Cannot open an OutputStream to a directory. Path: " + path.toString())); + } + + // Writing to an empty location requires a create option. + if (status.equals(DirectoryStatus.DOES_NOT_EXIST) + && !(optionsSet.contains(StandardOpenOption.CREATE) + || optionsSet.contains(StandardOpenOption.CREATE_NEW))) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new IOException("Writing to an empty location requires a create option. Path: " + path.toString())); + } + + // Cannot write to an existing file if create new was specified. + if (status.equals(DirectoryStatus.NOT_A_DIRECTORY) && optionsSet.contains(StandardOpenOption.CREATE_NEW)) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new IOException("A file already exists at this location and " + + "CREATE_NEW was specified. Path: " + path.toString())); + } + + // Create options based on file system config + AzureFileSystem fs = (AzureFileSystem) (path.getFileSystem()); + Integer blockSize = fs.getBlockSize() == null ? null : fs.getBlockSize().intValue(); + Integer putBlobThreshold = fs.getPutBlobThreshold() == null ? null : fs.getPutBlobThreshold().intValue(); + ParallelTransferOptions pto = new ParallelTransferOptions(blockSize, fs.getMaxConcurrencyPerRequest(), null, + putBlobThreshold); + + // Add an extra etag check for create new + BlobRequestConditions rq = null; + if (optionsSet.contains(StandardOpenOption.CREATE_NEW)) { + rq = new BlobRequestConditions().setIfNoneMatch("*"); + } + + // For parsing properties and metadata + if (fileAttributes == null) { + fileAttributes = new FileAttribute[0]; + } + resource.setFileAttributes(Arrays.asList(fileAttributes)); + + return new NioBlobOutputStream(resource.getBlobOutputStream(pto, rq), resource.getPath()); + } + + /** + * Returns an {@link AzureDirectoryStream} for iterating over the contents of a directory. The elements returned by + * the directory stream's iterator are of type Path, each one representing an entry in the directory. The Path + * objects are obtained as if by resolving the name of the directory entry against dir. The entries returned by the + * iterator are filtered by the given filter. + *

+ * When not using the try-with-resources construct, then directory stream's close method should be invoked after + * iteration is completed to free any resources held for the open directory. + *

+ * Where the filter terminates due to an uncaught error or runtime exception then it is propagated to the hasNext or + * next method. Where an IOException is thrown, it results in the hasNext or next method throwing a + * DirectoryIteratorException with the IOException as the cause. + * + * @param path the path to the directory + * @param filter the directory stream filter + * @return a new and open {@code DirectoryStream} object + * @throws IllegalArgumentException If the path type is not an instance of {@link AzurePath}. + * @throws NotDirectoryException if the file could not otherwise be opened because it is not a directory + * @throws IOException If an I/O error occurs. + * @throws SecurityException never + */ + @Override + public DirectoryStream newDirectoryStream(Path path, DirectoryStream.Filter filter) + throws IOException { + if (!(path instanceof AzurePath)) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new IllegalArgumentException("This provider cannot operate on subtypes of Path other than AzurePath")); + } + AzurePath.ensureFileSystemOpen(path); + + /* + Ensure the path is a directory. Note that roots are always directories. The case of an invalid root will be + caught in instantiating the stream below. + + Possible optimization later is to save the result of the list call to use as the first list call inside the + stream rather than a list call for checking the status and a list call for listing. + */ + if (!((AzurePath) path).isRoot() && !(new AzureResource(path).checkDirectoryExists())) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, new NotDirectoryException(path.toString())); + } + + return new AzureDirectoryStream((AzurePath) path, filter); + } + + /** + * Creates a new directory at the specified path. + *

+ * The existence of a directory in the {@code AzureFileSystem} is defined on two levels. Weak existence is + * defined by the presence of a non-zero number of blobs prefixed with the directory's path. This concept is also + * known as a virtual directory and enables the file system to work with containers that were pre-loaded + * with data by another source but need to be accessed by this file system. Strong existence is defined as + * the presence of an actual storage resource at the given path, which in the case of directories, is a zero-length + * blob whose name is the directory path with a particular metadata field indicating the blob's status as a + * directory. This is also known as a concrete directory. Directories created by this file system will + * strongly exist. Operations targeting directories themselves as the object (e.g. setting properties) will target + * marker blobs underlying concrete directories. Other operations (e.g. listing) will operate on the blob-name + * prefix. + *

+ * This method fulfills the nio contract of: "The check for the existence of the file and the creation of the + * directory if it does not exist are a single operation that is atomic with respect to all other filesystem + * activities that might affect the directory." More specifically, this method will atomically check for strong + * existence of another file or directory at the given path and fail if one is present. On the other hand, we + * only check for weak existence of the parent to determine if the given path is valid. Additionally, the + * action of checking whether the parent exists, is not atomic with the creation of the directory. Note that + * while it is possible that the parent may be deleted between when the parent is determined to exist and the + * creation of the child, the creation of the child will always ensure the existence of a virtual parent, so the + * child will never be left floating and unreachable. The different checks on parent and child is due to limitations + * in the Storage service API. + *

+ * There may be some unintuitive behavior when working with directories in this file system, particularly virtual + * directories (usually those not created by this file system). A virtual directory will disappear as soon as all + * its children have been deleted. Furthermore, if a directory with the given path weakly exists at the time of + * calling this method, this method will still return success and create a concrete directory at the target + * location. In other words, it is possible to "double create" a directory if it first weakly exists and then is + * strongly created. This is both because it is impossible to atomically check if a virtual directory exists while + * creating a concrete directory and because such behavior will have minimal side effects--no files will be + * overwritten and the directory will still be available for writing as intended, though it may not be empty. This + * is not a complete list of such unintuitive behavior. + *

+ * This method will attempt to extract standard HTTP content headers from the list of file attributes to set them + * as blob headers. All other attributes will be set as blob metadata. The value of every attribute will be + * converted to a {@code String} except the Content-MD5 attribute which expects a {@code byte[]}. + * When extracting the content headers, the following strings will be used for comparison (constants for these + * values can be found on this type): + *

+ * Note that these properties also have a particular semantic in that if one is specified, all are updated. In other + * words, if any of the above is set, all those that are not set will be cleared. See the + * Azure Docs for more + * information. + * + * @param path the directory to create + * @param fileAttributes an optional list of file attributes to set atomically when creating the directory + * @throws IllegalArgumentException If the path type is not an instance of {@link AzurePath}. + * @throws UnsupportedOperationException if the array contains an attribute that cannot be set atomically when + * creating the directory + * @throws FileAlreadyExistsException if a directory could not otherwise be created because a file of that name + * already exists + * @throws IOException If an I/O error occurs. + * @throws SecurityException never + */ + @Override + public void createDirectory(Path path, FileAttribute... fileAttributes) throws IOException { + fileAttributes = fileAttributes == null ? new FileAttribute[0] : fileAttributes; + + // Get the destination for the directory. Will throw if path is a root. + AzureResource azureResource = new AzureResource(path); + AzurePath.ensureFileSystemOpen(azureResource.getPath()); + + // Check if parent exists. If it does, atomically check if a file already exists and create a new dir if not. + if (azureResource.checkParentDirectoryExists()) { + try { + azureResource.setFileAttributes(Arrays.asList(fileAttributes)) + .putDirectoryBlob(new BlobRequestConditions().setIfNoneMatch("*")); + } catch (BlobStorageException e) { + if (e.getStatusCode() == HttpURLConnection.HTTP_CONFLICT + && e.getErrorCode().equals(BlobErrorCode.BLOB_ALREADY_EXISTS)) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new FileAlreadyExistsException(azureResource.getPath().toString())); + } else { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new IOException("An error occurred when creating the directory", e)); + } + } + } else { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new IOException("Parent directory does not exist for path: " + azureResource.getPath())); + } + } + + /** + * Deletes the specified resource. + *

+ * This method is not atomic with respect to other file system operations. It is possible to delete a file in use by + * another process, and doing so will not immediately invalidate any channels open to that file--they will simply + * start to fail. Root directories cannot be deleted even when empty. + * + * @param path the path to the file to delete + * @throws IllegalArgumentException If the path type is not an instance of {@link AzurePath}. + * @throws NoSuchFileException if the file does not exist + * @throws DirectoryNotEmptyException if the file is a directory and could not otherwise be deleted because the + * directory is not empty + * @throws IOException If an I/O error occurs. + * @throws SecurityException never + */ + @Override + public void delete(Path path) throws IOException { + // Basic validation. Must be an AzurePath. Cannot be a root. + AzureResource azureResource = new AzureResource(path); + AzurePath.ensureFileSystemOpen(azureResource.getPath()); + + // Check directory status--possibly throw DirectoryNotEmpty or NoSuchFile. + DirectoryStatus dirStatus = azureResource.checkDirStatus(); + if (dirStatus.equals(DirectoryStatus.DOES_NOT_EXIST)) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, new NoSuchFileException(path.toString())); + } + if (dirStatus.equals(DirectoryStatus.NOT_EMPTY)) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, new DirectoryNotEmptyException(path.toString())); + } + + // After all validation has completed, delete the resource. + try { + azureResource.getBlobClient().delete(); + } catch (BlobStorageException e) { + if (e.getErrorCode().equals(BlobErrorCode.BLOB_NOT_FOUND)) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, new NoSuchFileException(path.toString())); + } + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, new IOException(e)); + } + } + + /** + * Copies the resource at the source location to the destination. + *

+ * This method is not atomic with respect to other file system operations. More specifically, the checks necessary + * to validate the inputs and state of the file system are not atomic with the actual copying of data. If the copy + * is triggered, the copy itself is atomic and only a complete copy will ever be left at the destination. + *

+ * In addition to those in the docs for {@link FileSystemProvider#copy(Path, Path, CopyOption...)}, this method has + * the following requirements for successful completion. {@link StandardCopyOption#COPY_ATTRIBUTES} must be passed + * as it is impossible not to copy blob properties; if this option is not passed, an + * {@link UnsupportedOperationException} will be thrown. Neither the source nor the destination can be a root + * directory; if either is a root directory, an {@link IllegalArgumentException} will be thrown. The parent + * directory of the destination must at least weakly exist; if it does not, an {@link IOException} will be thrown. + * The only supported option other than {@link StandardCopyOption#COPY_ATTRIBUTES} is + * {@link StandardCopyOption#REPLACE_EXISTING}; the presence of any other option will result in an + * {@link UnsupportedOperationException}. + *

+ * This method supports both virtual and concrete directories as both the source and destination. Unlike when + * creating a directory, the existence of a virtual directory at the destination will cause this operation to fail. + * This is in order to prevent the possibility of overwriting a non-empty virtual directory with a file. Still, as + * mentioned above, this check is not atomic with the creation of the resultant directory. + * + * @param source the path to the file to copy + * @param destination the path to the target file + * @param copyOptions specifying how the copy should be done + * @throws UnsupportedOperationException if the array contains a copy option that is not supported + * @throws FileAlreadyExistsException if the target file exists but cannot be replaced because the REPLACE_EXISTING + * option is not specified + * @throws DirectoryNotEmptyException the REPLACE_EXISTING option is specified but the file cannot be replaced + * because it is a non-empty directory + * @throws IOException If an I/O error occurs. + * @throws IllegalArgumentException If the path type is not an instance of {@link AzurePath}. + * @throws SecurityException never + * @see #createDirectory(Path, FileAttribute[]) for more information about directory existence. + */ + @Override + public void copy(Path source, Path destination, CopyOption... copyOptions) throws IOException { + // If paths point to the same file, operation is a no-op. + if (source.equals(destination)) { + return; + } + + // Read and validate options. + // Remove accepted options as we find them. Anything left we don't support. + boolean replaceExisting = false; + List optionsList = new ArrayList<>(Arrays.asList(copyOptions)); +// NOTE: We're going to assume COPY_ATTRIBUTES as a default copy option (but can still be provided and handled safely) +// REPLACE_EXISTING must still be provided if you want to replace existing file + +// if (!optionsList.contains(StandardCopyOption.COPY_ATTRIBUTES)) { +// throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, new UnsupportedOperationException( +// "StandardCopyOption.COPY_ATTRIBUTES must be specified as the service will always copy " +// + "file attributes.")); +// } + if(optionsList.contains(StandardCopyOption.COPY_ATTRIBUTES)) { + optionsList.remove(StandardCopyOption.COPY_ATTRIBUTES); + } + + if (optionsList.contains(StandardCopyOption.REPLACE_EXISTING)) { + replaceExisting = true; + optionsList.remove(StandardCopyOption.REPLACE_EXISTING); + } + + if (!optionsList.isEmpty()) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new UnsupportedOperationException("Unsupported copy option found. Only " + + "StandardCopyOption.COPY_ATTRIBUTES and StandardCopyOption.REPLACE_EXISTING are supported.")); + } + + // Validate paths. Build resources. + // Copying a root directory or attempting to create/overwrite a root directory is illegal. + AzureResource sourceRes = new AzureResource(source); + AzurePath.ensureFileSystemOpen(sourceRes.getPath()); + AzureResource destinationRes = new AzureResource(destination); + AzurePath.ensureFileSystemOpen(destinationRes.getPath()); + + // Check destination is not a directory with children. + DirectoryStatus destinationStatus = destinationRes.checkDirStatus(); + if (destinationStatus.equals(DirectoryStatus.NOT_EMPTY)) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, new DirectoryNotEmptyException(destination.toString())); + } + + /* + Set request conditions if we should not overwrite. We can error out here if we know something already exists, + but we will also create request conditions as a safeguard against overwriting something that was created + between our check and put. + */ + BlobRequestConditions requestConditions = null; + if (!replaceExisting) { + if (!destinationStatus.equals(DirectoryStatus.DOES_NOT_EXIST)) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new FileAlreadyExistsException(destinationRes.getPath().toString())); + } + requestConditions = new BlobRequestConditions().setIfNoneMatch("*"); + } + + /* + More path validation + + Check that the parent for the destination exists. We only need to perform this check if there is nothing + currently at the destination, for if the destination exists, its parent at least weakly exists and we + can skip a service call. + */ + if (destinationStatus.equals(DirectoryStatus.DOES_NOT_EXIST) && !destinationRes.checkParentDirectoryExists()) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new IOException("Parent directory of destination location does not exist. The destination path is " + + "therefore invalid. Destination: " + destinationRes.getPath())); + } + + /* + Try to copy the resource at the source path. + + There is an optimization here where we try to do the copy first and only check for a virtual directory if + there's a 404. In the cases of files and concrete directories, this only requires one request. For virtual + directories, however, this requires three requests: failed copy, check status, create directory. Depending on + customer scenarios and how many virtual directories they copy, it could be better to check the directory status + first and then do a copy or createDir, which would always be two requests for all resource types. + */ + + try { + /* + Format the url by appending the SAS token as a param, otherwise the copy request will fail. + AzureFileSystem has been updated to handle url transformation via createSASAuthorizedURL() + */ + AzureFileSystem afs = (AzureFileSystem) sourceRes.getPath().getFileSystem(); + String sasAppendedSourceUrl = afs.createSASAppendedURL(sourceRes.getBlobClient().getBlobUrl()); + SyncPoller pollResponse = + destinationRes.getBlobClient().beginCopy(sasAppendedSourceUrl, null, null, null, + null, requestConditions, null); + pollResponse.waitForCompletion(Duration.ofSeconds(COPY_TIMEOUT_SECONDS)); + } catch (BlobStorageException e) { + // If the source was not found, it could be because it's a virtual directory. Check the status. + // If a non-dir resource existed, it would have been copied above. This check is therefore sufficient. + if (e.getErrorCode().equals(BlobErrorCode.BLOB_NOT_FOUND) + && !sourceRes.checkDirStatus().equals(DirectoryStatus.DOES_NOT_EXIST)) { + /* + We already checked that the parent exists and validated the paths above, so we can put the blob + directly. + */ + destinationRes.putDirectoryBlob(requestConditions); + } else { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, new IOException(e)); + } + } catch (RuntimeException e) { // To better log possible timeout from poller. + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, new IOException(e)); + } + } + + // Used for checking the status of the root directory. To be implemented later when needed. + /*int checkRootDirStatus(BlobContainerClient rootClient) { + + }*/ + + /** + * Unsupported. + * + * @param path path + * @param path1 path + * @param copyOptions options + * @throws UnsupportedOperationException Operation is not supported. + */ + @Override + public void move(Path path, Path path1, CopyOption... copyOptions) throws IOException { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, new UnsupportedOperationException()); + } + + /** + * Unsupported. + * + * @param path path + * @param path1 path + * @throws UnsupportedOperationException Operation is not supported. + */ + @Override + public boolean isSameFile(Path path, Path path1) throws IOException { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, new UnsupportedOperationException()); + } + + /** + * Always returns false as hidden files are not supported. + * + * @param path the path + * @return false + * @throws IOException If an I/O error occurs. + * @throws SecurityException never + */ + @Override + public boolean isHidden(Path path) throws IOException { + return false; + } + + /** + * Unsupported. + * + * @param path path + * @return the file store where the file is stored. + * @throws UnsupportedOperationException Operation is not supported. + * @throws IOException If an I/O error occurs. + * @throws SecurityException never + */ + @Override + public FileStore getFileStore(Path path) throws IOException { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, new UnsupportedOperationException()); + } + + /** + * Checks the existence, and optionally the accessibility, of a file. + *

+ * This method may only be used to check the existence of a file. It is not possible to determine the permissions + * granted to a given client, so if any mode argument is specified, an {@link UnsupportedOperationException} will be + * thrown. + * + * @param path the path to the file to check + * @param accessModes The access modes to check; may have zero elements + * @throws NoSuchFileException if a file does not exist + * @throws java.nio.file.AccessDeniedException the requested access would be denied or the access cannot be + * determined because the Java virtual machine has insufficient privileges or other reasons + * @throws IOException If an I/O error occurs. + * @throws SecurityException never + */ + @Override + public void checkAccess(Path path, AccessMode... accessModes) throws IOException { + if (accessModes != null && accessModes.length != 0) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new AccessDeniedException("The access cannot be determined.")); + } + AzurePath.ensureFileSystemOpen(path); + + /* + Some static utility methods in the jdk require checking access on a root. ReadAttributes is not supported on + roots as they are containers. Furthermore, we always assume that roots exist as they are verified at creation + and cannot be deleted by the file system. Thus, we prefer a short circuit for roots. + */ + if (path instanceof AzurePath && ((AzurePath) path).isRoot()) { + return; + } + + // Read attributes already wraps BlobStorageException in an IOException. + try { + readAttributes(path, BasicFileAttributes.class); + } catch (IOException e) { + Throwable cause = e.getCause(); + if (cause instanceof BlobStorageException + && BlobErrorCode.BLOB_NOT_FOUND.equals(((BlobStorageException) cause).getErrorCode())) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, new NoSuchFileException(path.toString())); + } else { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, e); + } + } + } + + /** + * Returns a file attribute view of a given type. + *

+ * See {@link AzureBasicFileAttributeView} and {@link AzureBlobFileAttributeView} for more information. + *

+ * Reading attributes on a virtual directory will return {@code null} for most properties other than + * {@link AzureBlobFileAttributes#isVirtualDirectory()}, which will return true. See + * {@link #createDirectory(Path, FileAttribute[])} for more information on virtual directories. + * + * @param path the path to the file + * @param type the Class object corresponding to the file attribute view + * @param linkOptions ignored + * @return a file attribute view of the specified type, or null if the attribute view type is not available + */ + @Override + @SuppressWarnings("unchecked") + public V getFileAttributeView(Path path, Class type, LinkOption... linkOptions) { + /* + No resource validation is necessary here. That can happen at the time of making a network requests internal to + the view object. + */ + if (type == BasicFileAttributeView.class || type == AzureBasicFileAttributeView.class) { + return (V) new AzureBasicFileAttributeView(path); + } else if (type == AzureBlobFileAttributeView.class) { + return (V) new AzureBlobFileAttributeView(path); + } else { + return null; + } + } + + /** + * Reads a file's attributes as a bulk operation. + *

+ * See {@link AzureBasicFileAttributes} and {@link AzureBlobFileAttributes} for more information. + *

+ * Reading attributes on a virtual directory will return {@code null} for most properties other than + * {@link AzureBlobFileAttributes#isVirtualDirectory()}, which will return true. See + * {@link #createDirectory(Path, FileAttribute[])} for more information on virtual directories. + * + * @param path the path to the file + * @param type the Class of the file attributes required to read + * @param linkOptions ignored + * @return the file attributes + * @throws UnsupportedOperationException if an attributes of the given type are not supported + * @throws IOException If an I/O error occurs. + * @throws SecurityException never + */ + @Override + @SuppressWarnings("unchecked") + public A readAttributes(Path path, Class type, LinkOption... linkOptions) + throws IOException { + AzurePath.ensureFileSystemOpen(path); + + Class view; + if (type == BasicFileAttributes.class || type == AzureBasicFileAttributes.class) { + view = AzureBasicFileAttributeView.class; + } else if (type == AzureBlobFileAttributes.class) { + view = AzureBlobFileAttributeView.class; + } else { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, new UnsupportedOperationException()); + } + + /* + Resource validation will happen in readAttributes of the view. We don't want to double-check, and checking + internal to the view ensures it is always checked no matter which code path is taken. + */ + return (A) getFileAttributeView(path, view, linkOptions).readAttributes(); + } + + /** + * Reads a set of file attributes as a bulk operation. + *

+ * See {@link AzureBasicFileAttributes} and {@link AzureBlobFileAttributes} for more information. + *

+ * Reading attributes on a virtual directory will return {@code null} for all properties other than + * {@link AzureBlobFileAttributes#isVirtualDirectory()}, which will return true. See + * {@link #createDirectory(Path, FileAttribute[])} for more information on virtual directories. + * + * @param path the path to the file + * @param attributes the attributes to read + * @param linkOptions ignored + * @return a map of the attributes returned; may be empty. The map's keys are the attribute names, its values are + * the attribute values + * @throws UnsupportedOperationException if an attributes of the given type are not supported + * @throws IllegalArgumentException if no attributes are specified or an unrecognized attributes is specified + * @throws IOException If an I/O error occurs. + * @throws SecurityException never + */ + @Override + public Map readAttributes(Path path, String attributes, LinkOption... linkOptions) + throws IOException { + if (attributes == null) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new IllegalArgumentException("Attribute string cannot be null.")); + } + + AzurePath.ensureFileSystemOpen(path); + + Map results = new HashMap<>(); + + /* + AzureBlobFileAttributes can do everything the basic attributes can do and more. There's no need to instantiate + one of each if both are specified somewhere in the list as that will waste a network call. This can be + generified later if we need to add more attribute types, but for now we can stick to just caching the supplier + for a single attributes object. + */ + Map> attributeSuppliers = null; // Initialized later as needed. + String viewType; + String attributeList; + String[] parts = attributes.split(":"); + + if (parts.length > 2) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new IllegalArgumentException("Invalid format for attribute string: " + attributes)); + } + + if (parts.length == 1) { + viewType = "basic"; // Per jdk docs. + attributeList = attributes; + } else { + viewType = parts[0]; + attributeList = parts[1]; + } + + /* + For specificity, our basic implementation of BasicFileAttributes uses the name azureBasic. However, the docs + state that "basic" must be supported, so we funnel to azureBasic. + */ + if ("basic".equals(viewType)) { + viewType = AzureBasicFileAttributeView.NAME; + } + if (!viewType.equals(AzureBasicFileAttributeView.NAME) && !viewType.equals(AzureBlobFileAttributeView.NAME)) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new UnsupportedOperationException("Invalid attribute view: " + viewType)); + } + + for (String attributeName : attributeList.split(",")) { + /* + We rely on the azureBlobFAV to actually do the work here as mentioned above, but if basic is specified, we + should at least validate that the attribute is available on a basic view. + */ + // TODO: Put these strings in constants + if (viewType.equals(AzureBasicFileAttributeView.NAME)) { + if (!AzureBasicFileAttributes.ATTRIBUTE_STRINGS.contains(attributeName) && !"*".equals(attributeName)) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new IllegalArgumentException("Invalid attribute. View: " + viewType + + ". Attribute: " + attributeName)); + } + } + + // As mentioned, azure blob can fulfill requests to both kinds of views. + // Populate the supplier if we haven't already. + if (attributeSuppliers == null) { + attributeSuppliers = AzureBlobFileAttributes.getAttributeSuppliers( + this.readAttributes(path, AzureBlobFileAttributes.class, linkOptions)); + } + + // If "*" is specified, add all the attributes from the specified set. + if ("*".equals(attributeName)) { + if (viewType.equals(AzureBasicFileAttributeView.NAME)) { + for (String attr : AzureBasicFileAttributes.ATTRIBUTE_STRINGS) { + results.put(attr, attributeSuppliers.get(attr).get()); + } + } else { + // attributeSuppliers is guaranteed to have been set by this point. + for (Map.Entry> entry: attributeSuppliers.entrySet()) { + results.put(entry.getKey(), entry.getValue().get()); + } + } + + } else if (!attributeSuppliers.containsKey(attributeName)) { + // Validate that the attribute is legal and add the value returned by the supplier to the results. + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new IllegalArgumentException("Invalid attribute. View: " + viewType + + ". Attribute: " + attributeName)); + } else { + results.put(attributeName, attributeSuppliers.get(attributeName).get()); + + } + } + + // Throw if nothing specified per jdk docs. + if (results.isEmpty()) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new IllegalArgumentException("No attributes were specified. Attributes: " + attributes)); + } + + return results; + } + + /** + * Sets the value of a file attribute. + *

+ * See {@link AzureBlobFileAttributeView} for more information. + *

+ * Setting attributes on a virtual directory is not supported and will throw an {@link IOException}. See + * {@link #createDirectory(Path, FileAttribute[])} for more information on virtual directories. + * + * @param path the path to the file + * @param attributes the attribute to set + * @param value the attribute value + * @param linkOptions ignored + * @throws UnsupportedOperationException if an attribute view is not available + * @throws IllegalArgumentException if the attribute name is not specified, or is not recognized, or the attribute + * value is of the correct type but has an inappropriate value + * @throws ClassCastException If the attribute value is not of the expected type or is a collection containing + * elements that are not of the expected type + * @throws IOException If an I/O error occurs. + * @throws SecurityException never + */ + @Override + public void setAttribute(Path path, String attributes, Object value, LinkOption... linkOptions) throws IOException { + AzurePath.ensureFileSystemOpen(path); + String viewType; + String attributeName; + String[] parts = attributes.split(":"); + if (parts.length > 2) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new IllegalArgumentException("Invalid format for attribute string: " + attributes)); + } + if (parts.length == 1) { + viewType = "basic"; // Per jdk docs. + attributeName = attributes; + } else { + viewType = parts[0]; + attributeName = parts[1]; + } + + /* + For specificity, our basic implementation of BasicFileAttributes uses the name azureBasic. However, the docs + state that "basic" must be supported, so we funnel to azureBasic. + */ + if ("basic".equals(viewType)) { + viewType = AzureBasicFileAttributeView.NAME; + } + + // We don't actually support any setters on the basic view. + if (viewType.equals(AzureBasicFileAttributeView.NAME)) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new IllegalArgumentException("Invalid attribute. View: " + viewType + + ". Attribute: " + attributeName)); + } else if (viewType.equals(AzureBlobFileAttributeView.NAME)) { + Map> attributeConsumers = AzureBlobFileAttributeView.setAttributeConsumers( + this.getFileAttributeView(path, AzureBlobFileAttributeView.class, linkOptions)); + if (!attributeConsumers.containsKey(attributeName)) { + // Validate that the attribute is legal and add the value returned by the supplier to the results. + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new IllegalArgumentException("Invalid attribute. View: " + viewType + + ". Attribute: " + attributeName)); + } + try { + attributeConsumers.get(attributeName).accept(value); + } catch (UncheckedIOException e) { + if (e.getMessage().equals(AzureBlobFileAttributeView.ATTR_CONSUMER_ERROR)) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, e.getCause()); + } + } + } else { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new UnsupportedOperationException("Invalid attribute view: " + viewType)); + } + } + + void closeFileSystem(String fileSystemName) { + this.openFileSystems.remove(fileSystemName); + } + + private String extractAccountEndpoint(URI uri) { + if (!uri.getScheme().equals(this.getScheme())) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, new IllegalArgumentException( + "URI scheme does not match this provider")); + } + if (CoreUtils.isNullOrEmpty(uri.getQuery())) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new IllegalArgumentException("URI does not contain a query component. FileSystems require a URI of " + + "the format \"azb://?endpoint=\".")); + } + + String endpoint = Flux.fromArray(uri.getQuery().split("&")) + .filter(s -> s.startsWith(ENDPOINT_QUERY_KEY + "=")) + .switchIfEmpty(Mono.defer(() -> Mono.error(LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new IllegalArgumentException("URI does not contain an \"" + ENDPOINT_QUERY_KEY + "=\" parameter. " + + "FileSystems require a URI of the format \"azb://?endpoint=\""))))) + .map(s -> s.substring(ENDPOINT_QUERY_KEY.length() + 1)) // Trim the query key and = + .blockLast(); + + if (CoreUtils.isNullOrEmpty(endpoint)) { + throw LoggingUtility.logError(ClientLoggerHolder.LOGGER, + new IllegalArgumentException("No account endpoint provided in URI query.")); + } + + return endpoint; + } +} diff --git a/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzurePath.java b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzurePath.java new file mode 100644 index 00000000000..917895ba39e --- /dev/null +++ b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzurePath.java @@ -0,0 +1,836 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.nio; + +import java.io.File; +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.file.ClosedFileSystemException; +import java.nio.file.FileSystem; +import java.nio.file.InvalidPathException; +import java.nio.file.LinkOption; +import java.nio.file.Path; +import java.nio.file.WatchEvent; +import java.nio.file.WatchKey; +import java.nio.file.WatchService; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Deque; +import java.util.Iterator; +import java.util.List; +import java.util.Objects; +import java.util.stream.Stream; + +import com.azure.core.util.logging.ClientLogger; +import com.azure.storage.blob.BlobClient; +import com.azure.storage.blob.BlobContainerClient; +import com.azure.storage.blob.BlobUrlParts; + +/** + * An object that may be used to locate a file in a file system. + *

+ * The root component, if it is present, is the first element of the path and is denoted by a {@code ':'} as the last + * character. Hence, only one instance of {@code ':'} may appear in a path string, and it may only be the last character + * of the first element in the path. The root component is used to identify which container a path belongs to. All other + * path elements, including separators, are considered as the blob name. {@link AzurePath#fromBlobUrl} may + * be used to convert a typical http url pointing to a blob into an {@code AzurePath} object pointing to the same + * resource. + *

+ * Constructing a syntactically valid path does not ensure a resource exists at the given path. An error will + * not be thrown until trying to access an invalid resource, e.g. trying to access a resource that does not exist. + *

+ * Path names are case-sensitive. + *

+ * If a resource is accessed via a relative path, it will be resolved against the default directory of the file system. + * The default directory is as defined in the {@link AzureFileSystem} docs. + *

+ * Leading and trailing separators will be stripped from each component passed to + * {@link AzureFileSystem#getPath(String, String...)}. This has the effect of treating "foo/" as though it were simply + * "foo". + */ +public final class AzurePath implements Path { + private static final ClientLogger LOGGER = new ClientLogger(AzurePath.class); + static final String ROOT_DIR_SUFFIX = ":"; + + private final AzureFileSystem parentFileSystem; + private final String pathString; + + AzurePath(AzureFileSystem parentFileSystem, String first, String... more) { + this.parentFileSystem = parentFileSystem; + + /* + Break all strings into their respective elements and remove empty elements. This has the effect of stripping + any trailing, leading, or internal delimiters so there are no duplicates/empty elements when we join. + */ + List elements = new ArrayList<>(Arrays.asList(first.split(parentFileSystem.getSeparator()))); + if (more != null) { + for (String next : more) { + elements.addAll(Arrays.asList(next.split(parentFileSystem.getSeparator()))); + } + } + elements.removeIf(String::isEmpty); + + this.pathString = String.join(this.parentFileSystem.getSeparator(), elements); + + // Validate the path string by checking usage of the reserved character ROOT_DIR_SUFFIX. + for (int i = 0; i < elements.size(); i++) { + String element = elements.get(i); + /* + If there is a root component, it must be the first element. A root component takes the format of + ":". The ':', or ROOT_DIR_SUFFIX, if present, can only appear once, and can only be the last + character of the first element. + */ + if (i == 0) { + if (element.contains(ROOT_DIR_SUFFIX) && element.indexOf(ROOT_DIR_SUFFIX) < element.length() - 1) { + throw LoggingUtility.logError(LOGGER, new InvalidPathException(this.pathString, ROOT_DIR_SUFFIX + + " may only be used as the last character in the root component of a path")); + } + // No element besides the first may contain the ROOT_DIR_SUFFIX, as only the first element may be the root. + } else if (element.contains(ROOT_DIR_SUFFIX)) { + throw LoggingUtility.logError(LOGGER, new InvalidPathException(this.pathString, ROOT_DIR_SUFFIX + + " is an invalid character except to identify the root element of this path if there is one.")); + } + } + } + + /** + * Returns the file system that created this object. + * + * @return the file system that created this object + */ + @Override + public FileSystem getFileSystem() { + return this.parentFileSystem; + } + + /** + * Tells whether this path is absolute. + *

+ * An absolute path is complete in that it doesn't need to be combined with other path information in order to + * locate a file. A path is considered absolute in this file system if it contains a root component. + * + * @return whether the path is absolute + */ + @Override + public boolean isAbsolute() { + return this.getRoot() != null; + } + + /** + * Returns the root component of this path as a Path object, or null if this path does not have a root component. + *

+ * The root component of this path also identifies the Azure Storage Container in which the file is stored. This + * method will not validate that the root component corresponds to an actual file store/container in this + * file system. It will simply return the root component of the path if one is present and syntactically valid. + * + * @return a path representing the root component of this path, or null + */ + @Override + public Path getRoot() { + // Check if the first element of the path is formatted like a root directory. + String[] elements = this.splitToElements(); + if (elements.length > 0 && elements[0].endsWith(ROOT_DIR_SUFFIX)) { + return this.parentFileSystem.getPath(elements[0]); + } + return null; + } + + /** + * Returns the name of the file or directory denoted by this path as a Path object. The file name is the farthest + * element from the root in the directory hierarchy. + * + * @return a path representing the name of the file or directory, or null if this path has zero elements + */ + @Override + public Path getFileName() { + if (this.isRoot()) { + return null; + } else if (this.pathString.isEmpty()) { + return this; + } else { + List elements = Arrays.asList(this.splitToElements()); + return this.parentFileSystem.getPath(elements.get(elements.size() - 1)); + } + } + + /** + * Returns the parent path, or null if this path does not have a parent. + *

+ * The parent of this path object consists of this path's root component, if any, and each element in the path + * except for the farthest from the root in the directory hierarchy. This method does not access the file system; + * the path or its parent may not exist. Furthermore, this method does not eliminate special names such as "." and + * ".." that may be used in some implementations. On UNIX for example, the parent of "/a/b/c" is "/a/b", and the + * parent of "x/y/." is "x/y". This method may be used with the normalize method, to eliminate redundant names, for + * cases where shell-like navigation is required. + *

+ * If this path has one or more elements, and no root component, then this method is equivalent to evaluating the + * expression: + * + * {@code subpath(0, getNameCount()-1);} + * + * @return a path representing the path's parent + */ + @Override + public Path getParent() { + /* + If this path only has one element or is empty, there is no parent. Note the root is included in the parent, so + we don't use getNameCount here. + */ + String[] elements = this.splitToElements(); + if (elements.length == 1 || elements.length == 0) { + return null; + } + + return this.parentFileSystem.getPath( + this.pathString.substring(0, this.pathString.lastIndexOf(this.parentFileSystem.getSeparator()))); + } + + /** + * Returns the number of name elements in the path. + * + * @return the number of elements in the path, or 0 if this path only represents a root component + */ + @Override + public int getNameCount() { + if (this.pathString.isEmpty()) { + return 1; + } + return this.splitToElements(this.withoutRoot()).length; + } + + /** + * Returns a name element of this path as a Path object. + *

+ * The index parameter is the index of the name element to return. The element that is closest to the root in the + * directory hierarchy has index 0. The element that is farthest from the root has index {@code count-1}. + * + * @param index the index of the element + * @return the name element + * @throws IllegalArgumentException if index is negative, index is greater than or equal to the number of elements, + * or this path has zero name elements + */ + @Override + public Path getName(int index) { + if (index < 0 || index >= this.getNameCount()) { + throw LoggingUtility.logError(LOGGER, new IllegalArgumentException(String.format("Index %d is out of " + + "bounds", index))); + } + // If the path is empty, the only valid option is also an empty path. + if (this.pathString.isEmpty()) { + return this; + } + + return this.parentFileSystem.getPath(this.splitToElements(this.withoutRoot())[index]); + } + + /** + * Returns a relative Path that is a subsequence of the name elements of this path. + *

+ * The beginIndex and endIndex parameters specify the subsequence of name elements. The name that is closest to the + * root in the directory hierarchy has index 0. The name that is farthest from the root has index {@code count-1}. + * The returned Path object has the name elements that begin at beginIndex and extend to the element at index + * {@code endIndex-1}. + * + * @param begin the index of the first element, inclusive + * @param end the index of the last element, exclusive + * @return a new Path object that is a subsequence of the name elements in this Path + */ + @Override + public Path subpath(int begin, int end) { + if (begin < 0 || begin >= this.getNameCount() + || end <= begin || end > this.getNameCount()) { + throw LoggingUtility.logError(LOGGER, + new IllegalArgumentException(String.format("Values of begin: %d and end: %d are invalid", begin, end))); + } + + String[] subnames = Stream.of(this.splitToElements(this.withoutRoot())) + .skip(begin) + .limit(end - begin) + .toArray(String[]::new); + + return this.parentFileSystem.getPath(String.join(this.parentFileSystem.getSeparator(), subnames)); + } + + /** + * Tests if this path starts with the given path. + *

+ * This path starts with the given path if this path's root component starts with the root component of the given + * path, and this path starts with the same name elements as the given path. If the given path has more name + * elements than this path then false is returned. + *

+ * If this path does not have a root component and the given path has a root component then this path does not start + * with the given path. + *

+ * If the given path is associated with a different FileSystem to this path then false is returned. + *

+ * In this implementation, a root component starts with another root component if the two root components are + * equivalent strings. In other words, if the files are stored in the same container. + * + * @param path the given path + * @return true if this path starts with the given path; otherwise false + */ + @Override + public boolean startsWith(Path path) { + if (!path.getFileSystem().equals(this.parentFileSystem)) { + return false; + } + + // An empty path never starts with another path and is never the start of another path. + if (this.pathString.isEmpty() ^ ((AzurePath) path).pathString.isEmpty()) { + return false; + } + + String[] thisPathElements = this.splitToElements(); + String[] otherPathElements = ((AzurePath) path).splitToElements(); + if (otherPathElements.length > thisPathElements.length) { + return false; + } + for (int i = 0; i < otherPathElements.length; i++) { + if (!otherPathElements[i].equals(thisPathElements[i])) { + return false; + } + } + + return true; + } + + /** + * Tests if this path starts with a Path, constructed by converting the given path string, in exactly the manner + * specified by the startsWith(Path) method. + * + * @param path the given path string + * @return true if this path starts with the given path; otherwise false + * @throws InvalidPathException If the path string cannot be converted to a Path. + */ + @Override + public boolean startsWith(String path) { + return this.startsWith(this.parentFileSystem.getPath(path)); + } + + /** + * Tests if this path ends with the given path. + *

+ * If the given path has N elements, and no root component, and this path has N or more elements, then this path + * ends with the given path if the last N elements of each path, starting at the element farthest from the root, + * are equal. + *

+ * If the given path has a root component then this path ends with the given path if the root component of this path + * ends with the root component of the given path, and the corresponding elements of both paths are equal. If this + * path does not have a root component and the given path has a root component then this path does not end with the + * given path. + *

+ * If the given path is associated with a different FileSystem to this path then false is returned. + *

+ * In this implementation, a root component ends with another root component if the two root components are + * equivalent strings. In other words, if the files are stored in the same container. + * + * @param path the given path + * @return true if this path ends with the given path; otherwise false + */ + @Override + public boolean endsWith(Path path) { + /* + There can only be one instance of a file system with a given id, so comparing object identity is equivalent + to checking ids here. + */ + if (path.getFileSystem() != this.parentFileSystem) { + return false; + } + + // An empty path never ends with another path and is never the end of another path. + if (this.pathString.isEmpty() ^ ((AzurePath) path).pathString.isEmpty()) { + return false; + } + + String[] thisPathElements = this.splitToElements(); + String[] otherPathElements = ((AzurePath) path).splitToElements(); + if (otherPathElements.length > thisPathElements.length) { + return false; + } + // If the given path has a root component, the paths must be equal. + if (path.getRoot() != null && otherPathElements.length != thisPathElements.length) { + return false; + } + for (int i = 1; i <= otherPathElements.length; i++) { + if (!otherPathElements[otherPathElements.length - i] + .equals(thisPathElements[thisPathElements.length - i])) { + return false; + } + } + return true; + } + + /** + * Tests if this path ends with a Path, constructed by converting the given path string, in exactly the manner + * specified by the endsWith(Path) method. + * + * @param path the given path string + * @return true if this path starts with the given path; otherwise false + * @throws InvalidPathException If the path string cannot be converted to a Path. + */ + @Override + public boolean endsWith(String path) { + return this.endsWith(this.parentFileSystem.getPath(path)); + } + + /** + * Returns a path that is this path with redundant name elements eliminated. + *

+ * It derives from this path, a path that does not contain redundant name elements. The "." and ".." are special + * names used to indicate the current directory and parent directory. All occurrences of "." are considered + * redundant. If a ".." is preceded by a non-".." name then both names are considered redundant (the process to + * identify such names is repeated until is it no longer applicable). + *

+ * This method does not access the file system; the path may not locate a file that exists. Eliminating ".." and a + * preceding name from a path may result in the path that locates a different file than the original path + * + * @return the resulting path or this path if it does not contain redundant name elements; an empty path is returned + * if this path does have a root component and all name elements are redundant + * + */ + @Override + public Path normalize() { + Deque stack = new ArrayDeque<>(); + String[] pathElements = this.splitToElements(); + Path root = this.getRoot(); + String rootStr = root == null ? null : root.toString(); + for (String element : pathElements) { + if (".".equals(element)) { + continue; + } else if ("..".equals(element)) { + if (rootStr != null) { + // Root path. We never push "..". + if (!stack.isEmpty() && stack.peekLast().equals(rootStr)) { + // Cannot go higher than root. Ignore. + continue; + } else { + stack.removeLast(); + } + } else { + // Relative paths can have an arbitrary number of ".." at the beginning. + if (stack.isEmpty()) { + stack.addLast(element); + } else if (stack.peek().equals("..")) { + stack.addLast(element); + } else { + stack.removeLast(); + } + } + } else { + stack.addLast(element); + } + } + + return this.parentFileSystem.getPath("", stack.toArray(new String[0])); + } + + /** + * Resolve the given path against this path. + *

+ * If the other parameter is an absolute path then this method trivially returns other. If other is an empty path + * then this method trivially returns this path. Otherwise, this method considers this path to be a directory and + * resolves the given path against this path. In the simplest case, the given path does not have a root component, + * in which case this method joins the given path to this path and returns a resulting path that ends with the given + * path. Where the given path has a root component then resolution is highly implementation dependent and therefore + * unspecified. + * + * @param path the path to resolve against this path + * @return the resulting path + */ + @Override + public Path resolve(Path path) { + if (path.isAbsolute()) { + return path; + } + if (path.getNameCount() == 0) { + return this; + } + return this.parentFileSystem.getPath(this.toString(), path.toString()); + } + + /** + * Converts a given path string to a Path and resolves it against this Path in exactly the manner specified by the + * {@link #resolve(Path) resolve} method. + * + * @param path the path string to resolve against this path + * @return the resulting path + * @throws InvalidPathException if the path string cannot be converted to a Path. + */ + @Override + public Path resolve(String path) { + return this.resolve(this.parentFileSystem.getPath(path)); + } + + /** + * Resolves the given path against this path's parent path. This is useful where a file name needs to be replaced + * with another file name. For example, suppose that the name separator is "/" and a path represents + * "dir1/dir2/foo", then invoking this method with the Path "bar" will result in the Path "dir1/dir2/bar". If this + * path does not have a parent path, or other is absolute, then this method returns other. If other is an empty path + * then this method returns this path's parent, or where this path doesn't have a parent, the empty path. + * + * @param path the path to resolve against this path's parent + * @return the resulting path + */ + @Override + public Path resolveSibling(Path path) { + if (path.isAbsolute()) { + return path; + } + + Path parent = this.getParent(); + return parent == null ? path : parent.resolve(path); + } + + /** + * Converts a given path string to a Path and resolves it against this path's parent path in exactly the manner + * specified by the resolveSibling method. + * + * @param path the path string to resolve against this path's parent + * @return the resulting path + * @throws InvalidPathException if the path string cannot be converted to a Path. + */ + @Override + public Path resolveSibling(String path) { + return this.resolveSibling(this.parentFileSystem.getPath(path)); + } + + /** + * Constructs a relative path between this path and a given path. + *

+ * Relativization is the inverse of resolution. This method attempts to construct a relative path that when resolved + * against this path, yields a path that locates the same file as the given path. + *

+ * A relative path cannot be constructed if only one of the paths have a root component. If both paths have a root + * component, it is still possible to relativize one against the other. If this path and the given path are equal + * then an empty path is returned. + *

+ * For any two normalized paths p and q, where q does not have a root component, + * {@code p.relativize(p.resolve(q)).equals(q)} + * + * @param path the path to relativize against this path + * @return the resulting relative path, or an empty path if both paths are equal + * @throws IllegalArgumentException if other is not a Path that can be relativized against this path + */ + @Override + public Path relativize(Path path) { + if (path.getRoot() == null ^ this.getRoot() == null) { + throw LoggingUtility.logError(LOGGER, + new IllegalArgumentException("Both paths must be absolute or neither can be")); + } + + AzurePath thisNormalized = (AzurePath) this.normalize(); + Path otherNormalized = path.normalize(); + + Deque deque = new ArrayDeque<>( + Arrays.asList(otherNormalized.toString().split(this.parentFileSystem.getSeparator()))); + + int i = 0; + String[] thisElements = thisNormalized.splitToElements(); + while (i < thisElements.length && !deque.isEmpty() && thisElements[i].equals(deque.peekFirst())) { + deque.removeFirst(); + i++; + } + while (i < thisElements.length) { + deque.addFirst(".."); + i++; + } + + return this.parentFileSystem.getPath("", deque.toArray(new String[0])); + } + + /** + * Returns a URI to represent this path. + *

+ * This method constructs an absolute URI with a scheme equal to the URI scheme that identifies the provider. + *

+ * No authority component is defined for the {@code URI} returned by this method. This implementation offers the + * same equivalence guarantee as the default provider. + * + * @return the URI representing this path + * @throws SecurityException never + */ + @Override + public URI toUri() { + try { + return new URI(this.parentFileSystem.provider().getScheme(), null, "/" + this.toAbsolutePath(), + null, null); + } catch (URISyntaxException e) { + throw LoggingUtility.logError(LOGGER, new IllegalStateException("Unable to create valid URI from path", e)); + } + } + + /** + * Returns a Path object representing the absolute path of this path. + *

+ * If this path is already absolute then this method simply returns this path. Otherwise, this method resolves the + * path against the default directory. + * + * @return a Path object representing the absolute path + * @throws SecurityException never + */ + @Override + public Path toAbsolutePath() { + if (this.isAbsolute()) { + return this; + } + return this.parentFileSystem.getDefaultDirectory().resolve(this); + } + + /** + * Unsupported. + * + * @param linkOptions options + * @return the real path + * @throws UnsupportedOperationException operation not supported. + */ + @Override + public Path toRealPath(LinkOption... linkOptions) throws IOException { + throw new UnsupportedOperationException("Symbolic links are not supported."); + } + + /** + * Unsupported. + * + * @return the file + * @throws UnsupportedOperationException operation not supported. + */ + @Override + public File toFile() { + throw new UnsupportedOperationException(); + } + + /** + * Unsupported. + * + * @param watchService watchService + * @param kinds kinds + * @param modifiers modifiers + * @return the watch key + * @throws UnsupportedOperationException operation not supported. + */ + @Override + public WatchKey register(WatchService watchService, WatchEvent.Kind[] kinds, WatchEvent.Modifier... modifiers) + throws IOException { + throw new UnsupportedOperationException("WatchEvents are not supported."); + } + + /** + * Unsupported. + * + * @param watchService watchService + * @param kinds kinds + * @return the watch key + * @throws UnsupportedOperationException operation not supported. + */ + @Override + public WatchKey register(WatchService watchService, WatchEvent.Kind... kinds) throws IOException { + throw new UnsupportedOperationException("WatchEvents are not supported."); + } + + /** + * Returns an iterator over the name elements of this path. + *

+ * The first element returned by the iterator represents the name element that is closest to the root in the + * directory hierarchy, the second element is the next closest, and so on. The last element returned is the name of + * the file or directory denoted by this path. The root component, if present, is not returned by the iterator. + * + * @return an iterator over the name elements of this path. + */ + @Override + public Iterator iterator() { + if (this.pathString.isEmpty()) { + return Collections.singletonList((Path) this).iterator(); + } + return Arrays.asList(Stream.of(this.splitToElements(this.withoutRoot())) + .map(s -> this.parentFileSystem.getPath(s)) + .toArray(Path[]::new)) + .iterator(); + } + + /** + * Compares two abstract paths lexicographically. This method does not access the file system and neither file is + * required to exist. + *

+ * This method may not be used to compare paths that are associated with different file system providers. + *

+ * This result of this method is identical to a string comparison on the underlying path strings. + * + * @return zero if the argument is equal to this path, a value less than zero if this path is lexicographically less + * than the argument, or a value greater than zero if this path is lexicographically greater than the argument + * @throws ClassCastException if the paths are associated with different providers + */ + @Override + public int compareTo(Path path) { + if (!(path instanceof AzurePath)) { + throw LoggingUtility.logError(LOGGER, new ClassCastException("Other path is not an instance of " + + "AzurePath.")); + } + + return this.pathString.compareTo(((AzurePath) path).pathString); + } + + /** + * Returns the string representation of this path. + *

+ * If this path was created by converting a path string using the getPath method then the path string returned by + * this method may differ from the original String used to create the path. + *

+ * The returned path string uses the default name separator to separate names in the path. + * + * @return the string representation of this path + */ + @Override + public String toString() { + return this.pathString; + } + + /** + * A path is considered equal to another path if it is associated with the same file system instance and if the + * path strings are equivalent. + * + * @return true if, and only if, the given object is a Path that is identical to this Path + */ + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + AzurePath paths = (AzurePath) o; + return Objects.equals(parentFileSystem, paths.parentFileSystem) + && Objects.equals(pathString, paths.pathString); + } + + @Override + public int hashCode() { + return Objects.hash(parentFileSystem, pathString); + } + + /** + * Returns a {@link BlobClient} which references a blob pointed to by this path. Note that this does not guarantee + * the existence of the blob at this location. + * + * @return a {@link BlobClient}. + * @throws IOException If the path only contains a root component or is empty + */ + public BlobClient toBlobClient() throws IOException { + /* + We don't store the blob client because unlike other types in this package, a Path does not actually indicate the + existence or even validity of any remote resource. It is purely a representation of a path. Therefore, we do not + construct the client or perform any validation until it is requested. + */ + // Converting to an absolute path ensures there is a container to operate on even if it is the default. + // Normalizing ensures the path is clean. + Path root = this.normalize().toAbsolutePath().getRoot(); + if (root == null) { + throw LoggingUtility.logError(LOGGER, + new IllegalStateException("Root should never be null after calling toAbsolutePath.")); + } + String fileStoreName = this.rootToFileStore(root.toString()); + + BlobContainerClient containerClient = + ((AzureFileStore) this.parentFileSystem.getFileStore()).getContainerClient(); + + String blobName = this.withoutRoot(); + if (blobName.isEmpty()) { + throw LoggingUtility.logError(LOGGER, new IOException("Cannot get a blob client to a path that only " + + "contains the root or is an empty path")); + } + + return containerClient.getBlobClient(blobName); + } + + /** + * A utility method to conveniently convert from a URL to a storage resource to an {@code AzurePath} pointing to the + * same resource. + * + * The url must be well formatted. There must be an open filesystem corresponding to the account which contains the + * blob. Otherwise, a {@link java.nio.file.FileSystemNotFoundException} will be thrown. + * + * The url may point to either an account, container, or blob. If it points to an account, the path will be empty, + * but it will have an internal reference to the file system containing it, meaning instance methods may be + * performed on the path to construct a reference to another object. If it points to a container, there will be one + * element, which is the root element. Everything after the container, that is the blob name, will then be appended + * after the root element. + * + * IP style urls are not currently supported. + * + * The {@link AzureFileSystemProvider} can typically be obtained via {@link AzureFileSystem#provider()}. + * + * @param provider The installed {@link AzureFileSystemProvider} that manages open file systems for this jvm. + * @param url The url to the desired resource. + * @return An {@link AzurePath} which points to the resource identified by the url. + * @throws URISyntaxException If the url contains elements which are not well formatted. + */ + public static AzurePath fromBlobUrl(AzureFileSystemProvider provider, String url) throws URISyntaxException { + BlobUrlParts parts = BlobUrlParts.parse(url); + URI fileSystemUri = hostToFileSystemUri(provider, parts.getScheme(), parts.getHost()); + FileSystem parentFileSystem = provider.getFileSystem(fileSystemUri); + return new AzurePath((AzureFileSystem) parentFileSystem, fileStoreToRoot(parts.getBlobContainerName()), + parts.getBlobName() == null ? "" : parts.getBlobName()); + } + + /** + * @return Whether this path consists of only a root component. + */ + boolean isRoot() { + return this.equals(this.getRoot()); + } + + private String withoutRoot() { + Path root = this.getRoot(); + String str = this.pathString; + if (root != null) { + str = this.pathString.substring(root.toString().length()); + } + if (str.startsWith(this.parentFileSystem.getSeparator())) { + str = str.substring(1); + } + + return str; + } + + private String[] splitToElements() { + return this.splitToElements(this.pathString); + } + + private String[] splitToElements(String str) { + String[] arr = str.split(this.parentFileSystem.getSeparator()); + /* + This is a special case where we split after removing the root from a path that is just the root. Or otherwise + have an empty path. + */ + if (arr.length == 1 && arr[0].isEmpty()) { + return new String[0]; + } + return arr; + } + + private String rootToFileStore(String root) { + return root.substring(0, root.length() - 1); // Remove the ROOT_DIR_SUFFIX + } + + private static String fileStoreToRoot(String fileStore) { + if (fileStore == null || "".equals(fileStore)) { + return ""; + } + return fileStore + ROOT_DIR_SUFFIX; + } + + private static URI hostToFileSystemUri(AzureFileSystemProvider provider, String scheme, String host) + throws URISyntaxException { + return new URI(provider.getScheme() + "://?endpoint=" + scheme + "://" + host); + } + + static void ensureFileSystemOpen(Path p) { + if (!p.getFileSystem().isOpen()) { + throw LoggingUtility.logError(LOGGER, new ClosedFileSystemException()); + } + } +} diff --git a/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureResource.java b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureResource.java new file mode 100644 index 00000000000..92fb14a62cc --- /dev/null +++ b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureResource.java @@ -0,0 +1,284 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.nio; + +import com.azure.core.util.logging.ClientLogger; +import com.azure.storage.blob.BlobContainerClient; +import com.azure.storage.blob.BlobClient; +import com.azure.storage.blob.BlobContainerClientBuilder; +import com.azure.storage.blob.models.BlobHttpHeaders; +import com.azure.storage.blob.models.BlobItem; +import com.azure.storage.blob.models.BlobListDetails; +import com.azure.storage.blob.models.BlobProperties; +import com.azure.storage.blob.models.BlobRequestConditions; +import com.azure.storage.blob.models.BlobStorageException; +import com.azure.storage.blob.models.ListBlobsOptions; +import com.azure.storage.blob.models.ParallelTransferOptions; +import com.azure.storage.blob.options.BlockBlobOutputStreamOptions; +import com.azure.storage.blob.specialized.BlobOutputStream; +import com.azure.storage.common.implementation.Constants; + +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.attribute.FileAttribute; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +/** + * This type is meant to be a logical grouping of operations and data associated with an azure resource. It is NOT + * intended to serve as a local cache for any data related to remote resources. It is agnostic to whether the resource + * is a directory or a file and will not perform any validation of the resource type, though root directories are not + * supported as they are backed by containers and do not support many file system apis. + * + * It also serves as the interface to Storage clients. Any operation that needs to use a client should first build an + * AzureResource using a path and then use the getter to access the client. + */ +final class AzureResource { + private static final ClientLogger LOGGER = new ClientLogger(AzureResource.class); + + static final String DIR_METADATA_MARKER = Constants.HeaderConstants.DIRECTORY_METADATA_KEY; + + private final AzurePath path; + private final BlobClient blobClient; + + // The following are not kept consistent with the service. They are only held here between parsing and putting. + private BlobHttpHeaders blobHeaders; + private Map blobMetadata; + + AzureResource(Path path) throws IOException { + Objects.requireNonNull(path, "path"); + this.path = validatePathInstanceType(path); + this.validateNotRoot(); + this.blobClient = this.path.toBlobClient(); + } + + /** + * Checks for the existence of the parent of the given path. We do not check for the actual marker blob as parents + * need only weakly exist. + * + * If the parent is a root (container), it will be assumed to exist, so it must be validated elsewhere that the + * container is a legitimate root within this file system. + */ + boolean checkParentDirectoryExists() throws IOException { + /* + If the parent is just the root (or null, which means the parent is implicitly the default directory which is a + root), that means we are checking a container, which is always considered to exist. Otherwise, perform normal + existence check. + */ + Path parent = this.path.getParent(); + return (parent == null || parent.equals(path.getRoot())) + || new AzureResource(this.path.getParent()).checkDirectoryExists(); + } + + /** + * Checks whether a directory exists by either being empty or having children. + */ + boolean checkDirectoryExists() throws IOException { + DirectoryStatus dirStatus = this.checkDirStatus(); + return dirStatus.equals(DirectoryStatus.EMPTY) || dirStatus.equals(DirectoryStatus.NOT_EMPTY); + } + + /* + This method will check specifically whether there is a virtual directory at this location. It must be known before + that there is no file present at the destination. + */ + boolean checkVirtualDirectoryExists() throws IOException { + DirectoryStatus dirStatus = this.checkDirStatus(false); + return dirStatus.equals(DirectoryStatus.NOT_EMPTY); // Virtual directories cannot be empty + } + + /** + * This method will check if a directory is extant and/or empty and accommodates virtual directories. This method + * will not check the status of root directories. + */ + DirectoryStatus checkDirStatus() throws IOException { + if (this.blobClient == null) { + throw LoggingUtility.logError(LOGGER, new IllegalArgumentException("The blob client was null.")); + } + + /* + * Do a get properties first on the directory name. This will determine if it is concrete&&exists or is either + * virtual or doesn't exist. + */ + BlobProperties props = null; + boolean exists = false; + try { + props = this.getBlobClient().getProperties(); + exists = true; + } catch (BlobStorageException e) { + if (e.getStatusCode() != 404) { + throw LoggingUtility.logError(LOGGER, new IOException(e)); + } + } + + // Check if the resource is a file or directory before listing + if (exists && !props.getMetadata().containsKey(AzureResource.DIR_METADATA_MARKER)) { + return DirectoryStatus.NOT_A_DIRECTORY; + } + + return checkDirStatus(exists); + } + + /* + This method will determine the status of the directory given it is already known whether or not there is an object + at the target. + */ + DirectoryStatus checkDirStatus(boolean exists) throws IOException { + BlobContainerClient containerClient = this.getContainerClient(); + + // List on the directory name + '/' so that we only get things under the directory if any + ListBlobsOptions listOptions = new ListBlobsOptions().setMaxResultsPerPage(2) + .setPrefix(this.blobClient.getBlobName() + AzureFileSystem.PATH_SEPARATOR) + .setDetails(new BlobListDetails().setRetrieveMetadata(true)); + + /* + * If listing returns anything, then it is not empty. If listing returns nothing and exists() was true, then it's + * empty Else it does not exist + */ + try { + Iterator blobIterator = containerClient.listBlobsByHierarchy(AzureFileSystem.PATH_SEPARATOR, + listOptions, null).iterator(); + if (blobIterator.hasNext()) { + return DirectoryStatus.NOT_EMPTY; + } else if (exists) { + return DirectoryStatus.EMPTY; + } else { + return DirectoryStatus.DOES_NOT_EXIST; + } + } catch (BlobStorageException e) { + throw LoggingUtility.logError(LOGGER, new IOException(e)); + } + } + + /** + * Creates the actual directory marker. This method should only be used when any necessary checks for proper + * conditions of directory creation (e.g. parent existence) have already been performed. Otherwise, + * {@link AzureFileSystemProvider#createDirectory(Path, FileAttribute[])} should be preferred. + * + * @param requestConditions Any necessary request conditions to pass when creating the directory blob. + */ + void putDirectoryBlob(BlobRequestConditions requestConditions) { + this.blobClient.getBlockBlobClient().commitBlockListWithResponse(Collections.emptyList(), this.blobHeaders, + this.prepareMetadataForDirectory(), null, requestConditions, null, null); + } + + /* + Note that this will remove the properties from the list of attributes as it finds them. + */ + private void extractHttpHeaders(List> fileAttributes) { + BlobHttpHeaders headers = new BlobHttpHeaders(); + for (Iterator> it = fileAttributes.iterator(); it.hasNext();) { + FileAttribute attr = it.next(); + boolean propertyFound = true; + switch (attr.name()) { + case AzureFileSystemProvider.CONTENT_TYPE: + headers.setContentType(attr.value().toString()); + break; + case AzureFileSystemProvider.CONTENT_LANGUAGE: + headers.setContentLanguage(attr.value().toString()); + break; + case AzureFileSystemProvider.CONTENT_DISPOSITION: + headers.setContentDisposition(attr.value().toString()); + break; + case AzureFileSystemProvider.CONTENT_ENCODING: + headers.setContentEncoding(attr.value().toString()); + break; + case AzureFileSystemProvider.CONTENT_MD5: + if ((attr.value() instanceof byte[])) { + headers.setContentMd5((byte[]) attr.value()); + } else { + throw LoggingUtility.logError(LOGGER, + new UnsupportedOperationException("Content-MD5 attribute must be a byte[]")); + } + break; + case AzureFileSystemProvider.CACHE_CONTROL: + headers.setCacheControl(attr.value().toString()); + break; + default: + propertyFound = false; + break; + } + + if (propertyFound) { + it.remove(); + } + } + + this.blobHeaders = headers; + } + + /** + * Note this should only be used after the headers have been extracted. + * + * @param fileAttributes The attributes to convert to metadata. + */ + private void convertAttributesToMetadata(List> fileAttributes) { + Map metadata = new HashMap<>(); + for (FileAttribute attr : fileAttributes) { + metadata.put(attr.name(), attr.value().toString()); + } + + // If no attributes are set, return null so existing metadata is not cleared. + this.blobMetadata = metadata.isEmpty() ? null : metadata; + } + + private void validateNotRoot() { + if (this.path.isRoot()) { + throw LoggingUtility.logError(LOGGER, new IllegalArgumentException( + "Root directory not supported. Path: " + this.path)); + } + } + + private AzurePath validatePathInstanceType(Path path) { + if (!(path instanceof AzurePath)) { + throw LoggingUtility.logError(LOGGER, new IllegalArgumentException("This provider cannot operate on " + + "subtypes of Path other than AzurePath")); + } + return (AzurePath) path; + } + + BlobContainerClient getContainerClient() { + return new BlobContainerClientBuilder().endpoint(this.blobClient.getBlobUrl()) + .pipeline(this.blobClient.getHttpPipeline()) + .buildClient(); + } + + AzureResource setFileAttributes(List> attributes) { + attributes = new ArrayList<>(attributes); // To ensure removing header values from the list is supported. + extractHttpHeaders(attributes); + convertAttributesToMetadata(attributes); + + return this; + } + + AzurePath getPath() { + return this.path; + } + + BlobClient getBlobClient() { + return this.blobClient; + } + + BlobOutputStream getBlobOutputStream(ParallelTransferOptions pto, BlobRequestConditions rq) { + BlockBlobOutputStreamOptions options = new BlockBlobOutputStreamOptions() + .setHeaders(this.blobHeaders) + .setMetadata(this.blobMetadata) + .setParallelTransferOptions(pto) + .setRequestConditions(rq); + return this.blobClient.getBlockBlobClient().getBlobOutputStream(options); + } + + private Map prepareMetadataForDirectory() { + if (this.blobMetadata == null) { + this.blobMetadata = new HashMap<>(); + } + this.blobMetadata.put(DIR_METADATA_MARKER, "true"); + return this.blobMetadata; + } +} diff --git a/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureSeekableByteChannel.java b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureSeekableByteChannel.java new file mode 100644 index 00000000000..e51e727450b --- /dev/null +++ b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/AzureSeekableByteChannel.java @@ -0,0 +1,245 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.nio; + +import com.azure.core.util.logging.ClientLogger; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.ClosedChannelException; +import java.nio.channels.NonReadableChannelException; +import java.nio.channels.NonWritableChannelException; +import java.nio.channels.SeekableByteChannel; +import java.nio.file.Path; + +/** + * A byte channel that maintains a current position. + *

+ * A channel may only be opened in read mode OR write mode. It may not be opened in read/write mode. Seeking is + * supported for reads, but not for writes. Modifications to existing files is not permitted--only creating new files or + * overwriting existing files. + *

+ * This type is not threadsafe to prevent having to hold locks across network calls. + */ +public final class AzureSeekableByteChannel implements SeekableByteChannel { + private static final ClientLogger LOGGER = new ClientLogger(AzureSeekableByteChannel.class); + + private final NioBlobInputStream reader; + private final NioBlobOutputStream writer; + private long position; + private boolean closed = false; + private final Path path; + /* + If this type needs to be made threadsafe, closed should be volatile. We need to add a lock to guard updates to + position or make it an atomicLong. If we have a lock, we have to be careful about holding while doing io ops and at + least ensure timeouts are set. We probably have to duplicate or copy the buffers for at least writing to ensure they + don't get overwritten. + */ + + AzureSeekableByteChannel(NioBlobInputStream inputStream, Path path) { + this.reader = inputStream; + /* + We mark at the beginning (we always construct a stream to the beginning of the blob) to support seeking. We can + effectively seek anywhere by always marking at the beginning of the blob and then a seek is resetting to that + mark and skipping. + */ + inputStream.mark(Integer.MAX_VALUE); + this.writer = null; + this.position = 0; + this.path = path; + } + + AzureSeekableByteChannel(NioBlobOutputStream outputStream, Path path) { + this.writer = outputStream; + this.reader = null; + this.position = 0; + this.path = path; + } + + @Override + public int read(ByteBuffer dst) throws IOException { + AzurePath.ensureFileSystemOpen(this.path); + validateOpen(); + validateReadMode(); + + // See comments in position(), remember that position is 0-based and size() is exclusive + if (this.position >= this.size()) { + return -1; // at or past EOF + } + + // If the buffer is backed by an array, we can write directly to that instead of allocating new memory. + int pos; + final int limit; + final byte[] buf; + if (dst.hasArray()) { + // ByteBuffer has a position and limit that define the bounds of the writeable area, and that + // area can be both smaller than the backing array and might not begin at array index 0. + pos = dst.position(); + limit = pos + dst.remaining(); + buf = dst.array(); + } else { + pos = 0; + limit = dst.remaining(); + buf = new byte[limit]; + } + + while (pos < limit) { + int byteCount = this.reader.read(buf, pos, limit - pos); + if (byteCount == -1) { + break; + } + pos += byteCount; + } + + /* + Either write to the destination if we had to buffer separately or just set the position correctly if we wrote + underneath the buffer + */ + int count; + if (dst.hasArray()) { + count = pos - dst.position(); + dst.position(pos); + } else { + count = pos; // original position was 0 + dst.put(buf, 0, count); + } + + this.position += count; + return count; + } + + @Override + public int write(ByteBuffer src) throws IOException { + AzurePath.ensureFileSystemOpen(this.path); + validateOpen(); + validateWriteMode(); + + final int length = src.remaining(); + this.position += length; + + /* + If the buffer is backed by an array, we can read directly from that instead of allocating new memory. + Set the position correctly if we read from underneath the buffer + */ + int pos; + byte[] buf; + if (src.hasArray()) { + // ByteBuffer has a position and limit that define the bounds of the readable area, and that + // area can be both smaller than the backing array and might not begin at array index 0. + pos = src.position(); + buf = src.array(); + src.position(pos + length); + } else { + pos = 0; + buf = new byte[length]; + src.get(buf); // advances src.position() + } + // Either way, the src.position() and this.position have been updated before we know if this write + // will succeed. (Original behavior.) It may be better to update position(s) only *after* success, + // but then on IOException would we know if there was a partial write, and if so how much? + this.writer.write(buf, pos, length); + return length; + } + + @Override + public long position() throws IOException { + AzurePath.ensureFileSystemOpen(this.path); + validateOpen(); + + return this.position; + } + + @Override + public AzureSeekableByteChannel position(long newPosition) throws IOException { + AzurePath.ensureFileSystemOpen(this.path); + validateOpen(); + validateReadMode(); + + if (newPosition < 0) { + throw LoggingUtility.logError(LOGGER, new IllegalArgumentException("Seek position cannot be negative")); + } + + /* + The javadoc says seeking past the end for reading is legal and that it should indicate the end of the file on + the next read. StorageInputStream doesn't allow this, but we can get around that by modifying the + position variable and skipping the actual read (when read is called next); we'll check in read if we've seeked + past the end and short circuit there as well. + + Because we are in read mode this will always give us the size from properties. + */ + if (newPosition > this.size()) { + this.position = newPosition; + return this; + } + this.reader.reset(); // Because we always mark at the beginning, this will reset us back to the beginning. + this.reader.mark(Integer.MAX_VALUE); + long skipAmount = this.reader.skip(newPosition); + if (skipAmount < newPosition) { + throw new IOException("Could not set desired position"); + } + this.position = newPosition; + + return this; + } + + @Override + public long size() throws IOException { + AzurePath.ensureFileSystemOpen(this.path); + validateOpen(); + + /* + If we are in read mode, the size is the size of the file. + If we are in write mode, the size is the amount of data written so far. + */ + if (reader != null) { + return reader.getBlobInputStream().getProperties().getBlobSize(); + } else { + return position; + } + } + + @Override + public AzureSeekableByteChannel truncate(long size) throws IOException { + throw LoggingUtility.logError(LOGGER, new UnsupportedOperationException()); + } + + @Override + public boolean isOpen() { + AzurePath.ensureFileSystemOpen(this.path); + return !this.closed; + } + + @Override + public void close() throws IOException { + AzurePath.ensureFileSystemOpen(this.path); + if (this.reader != null) { + this.reader.close(); + } else { + this.writer.close(); + } + this.closed = true; + } + + Path getPath() { + return this.path; + } + + private void validateOpen() throws ClosedChannelException { + if (this.closed) { + throw LoggingUtility.logError(LOGGER, new ClosedChannelException()); + } + } + + private void validateReadMode() { + if (this.reader == null) { + throw LoggingUtility.logError(LOGGER, new NonReadableChannelException()); + } + } + + private void validateWriteMode() { + if (this.writer == null) { + throw LoggingUtility.logError(LOGGER, new NonWritableChannelException()); + } + } +} diff --git a/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/DirectoryStatus.java b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/DirectoryStatus.java new file mode 100644 index 00000000000..8356a7ebeb1 --- /dev/null +++ b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/DirectoryStatus.java @@ -0,0 +1,23 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.nio; + +/** + * RESERVED FOR INTERNAL USE. + * + * An enum to indicate the status of a directory. + */ +enum DirectoryStatus { + EMPTY, // The directory at least weakly exists and is empty. + + NOT_EMPTY, // The directory at least weakly exists and has one or more children. + + DOES_NOT_EXIST, // There is no resource at this path. + + NOT_A_DIRECTORY; // A resource exists at this path, but it is not a directory. + + static boolean isDirectory(DirectoryStatus status) { + return EMPTY.equals(status) || NOT_EMPTY.equals(status); + } +} diff --git a/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/LoggingUtility.java b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/LoggingUtility.java new file mode 100644 index 00000000000..3cd503f98c2 --- /dev/null +++ b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/LoggingUtility.java @@ -0,0 +1,16 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.nio; + +import com.azure.core.util.logging.ClientLogger; + +/** + * Only a minimal Utility class to get around a shortcoming in Core's logging. + */ +final class LoggingUtility { + public static T logError(ClientLogger logger, T e) { + logger.error(e.getMessage()); + return e; + } +} diff --git a/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/NioBlobInputStream.java b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/NioBlobInputStream.java new file mode 100644 index 00000000000..676972dc93a --- /dev/null +++ b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/NioBlobInputStream.java @@ -0,0 +1,211 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.nio; + +import com.azure.core.util.logging.ClientLogger; +import com.azure.storage.blob.specialized.BlobInputStream; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Path; + +/** + * Provides an InputStream to read a file stored as an Azure Blob. + */ +public final class NioBlobInputStream extends InputStream { + private static final ClientLogger LOGGER = new ClientLogger(NioBlobInputStream.class); + + private final BlobInputStream blobInputStream; + private final Path path; + + NioBlobInputStream(BlobInputStream blobInputStream, Path path) { + this.blobInputStream = blobInputStream; + this.path = path; + } + + /** + * Returns an estimate of the number of bytes that can be read (or skipped over) from this input stream without + * blocking by the next invocation of a method for this input stream. The next invocation might be the same thread + * or another thread. A single read or skip of this many bytes will not block, but may read or skip fewer bytes. + * + * @return An int which represents an estimate of the number of bytes that can be read (or skipped + * over) from this input stream without blocking, or 0 when it reaches the end of the input stream. + */ + @Override + public synchronized int available() throws IOException { + AzurePath.ensureFileSystemOpen(path); + return this.blobInputStream.available(); + } + + /** + * Closes this input stream and releases any system resources associated with the stream. + */ + @Override + public synchronized void close() throws IOException { + AzurePath.ensureFileSystemOpen(path); + this.blobInputStream.close(); + } + + /** + * Marks the current position in this input stream. A subsequent call to the reset method repositions this stream at + * the last marked position so that subsequent reads re-read the same bytes. + * + * @param readlimit An int which represents the maximum limit of bytes that can be read before the mark + * position becomes invalid. + */ + @Override + public synchronized void mark(final int readlimit) { + this.blobInputStream.mark(readlimit); + } + + /** + * Tests if this input stream supports the mark and reset methods. + * + * @return Returns {@code true} + */ + @Override + public boolean markSupported() { + return this.blobInputStream.markSupported(); + } + + /** + * Reads the next byte of data from the input stream. The value byte is returned as an int in the range 0 to 255. If + * no byte is available because the end of the stream has been reached, the value -1 is returned. This method blocks + * until input data is available, the end of the stream is detected, or an exception is thrown. + * + * @return An int which represents the total number of bytes read into the buffer, or -1 if there is no + * more data because the end of the stream has been reached. + * @throws IOException If an I/O error occurs. + */ + @Override + public int read() throws IOException { + AzurePath.ensureFileSystemOpen(path); + try { + return this.blobInputStream.read(); + /* + BlobInputStream only throws RuntimeException, and it doesn't preserve the cause, it only takes the message, + so we can't do any better than re-wrapping it in an IOException. + */ + } catch (RuntimeException e) { + throw LoggingUtility.logError(LOGGER, new IOException(e)); + } + } + + /** + * Reads some number of bytes from the input stream and stores them into the buffer array b. The number + * of bytes actually read is returned as an integer. This method blocks until input data is available, end of file + * is detected, or an exception is thrown. If the length of b is zero, then no bytes are read and 0 is + * returned; otherwise, there is an attempt to read at least one byte. If no byte is available because the stream is + * at the end of the file, the value -1 is returned; otherwise, at least one byte is read and stored into + * b. + * + * The first byte read is stored into element b[0], the next one into b[1], and so on. The + * number of bytes read is, at most, equal to the length of b. Let k be the number of + * bytes actually read; these bytes will be stored in elements b[0] through b[k-1], + * leaving elements b[k] through + * b[b.length-1] unaffected. + * + * The read(b) method for class {@link InputStream} has the same effect as: + * + * read(b, 0, b.length) + * + * @param b A byte array which represents the buffer into which the data is read. + * @throws IOException If the first byte cannot be read for any reason other than the end of the file, if the input + * stream has been closed, or if some other I/O error occurs. + * @throws NullPointerException If the byte array b is null. + */ + @Override + public int read(final byte[] b) throws IOException { + AzurePath.ensureFileSystemOpen(path); + try { + return this.blobInputStream.read(b); + } catch (RuntimeException e) { + throw LoggingUtility.logError(LOGGER, new IOException(e)); + } + } + + /** + * Reads up to len bytes of data from the input stream into an array of bytes. An attempt is made to + * read as many as len bytes, but a smaller number may be read. The number of bytes actually read is + * returned as an integer. This method blocks until input data is available, end of file is detected, or an + * exception is thrown. + * + * If len is zero, then no bytes are read and 0 is returned; otherwise, there is an attempt to read at + * least one byte. If no byte is available because the stream is at end of file, the value -1 is returned; + * otherwise, at least one byte is read and stored into b. + * + * The first byte read is stored into element b[off], the next one into b[off+1], and so + * on. The number of bytes read is, at most, equal to len. Let k be the number of bytes + * actually read; these bytes will be stored in elements b[off] through b[off+k-1], + * leaving elements b[off+k] through + * b[off+len-1] unaffected. + * + * In every case, elements b[0] through b[off] and elements b[off+len] + * through b[b.length-1] are unaffected. + * + * @param b A byte array which represents the buffer into which the data is read. + * @param off An int which represents the start offset in the byte array at which the data + * is written. + * @param len An int which represents the maximum number of bytes to read. + * @return An int which represents the total number of bytes read into the buffer, or -1 if there is no + * more data because the end of the stream has been reached. + * @throws IOException If the first byte cannot be read for any reason other than end of file, or if the input + * stream has been closed, or if some other I/O error occurs. + * @throws NullPointerException If the byte array b is null. + * @throws IndexOutOfBoundsException If off is negative, len is negative, or + * len is greater than + * b.length - off. + */ + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + AzurePath.ensureFileSystemOpen(path); + if (off < 0 || len < 0 || len > b.length - off) { + throw LOGGER.logExceptionAsError(new IndexOutOfBoundsException()); + } + try { + return this.blobInputStream.read(b, off, len); + } catch (RuntimeException e) { + throw LoggingUtility.logError(LOGGER, new IOException(e)); + } + } + + /** + * Repositions this stream to the position at the time the mark method was last called on this input stream. Note + * repositioning the blob read stream will disable blob MD5 checking. + * + * @throws IOException If this stream has not been marked or if the mark has been invalidated. + */ + @Override + public synchronized void reset() throws IOException { + AzurePath.ensureFileSystemOpen(path); + try { + this.blobInputStream.reset(); + } catch (RuntimeException e) { + if (e.getMessage().equals("Stream mark expired.")) { + throw LoggingUtility.logError(LOGGER, new IOException(e)); + } + throw LoggingUtility.logError(LOGGER, e); + } + } + + /** + * Skips over and discards n bytes of data from this input stream. The skip method may, for a variety of reasons, + * end up skipping over some smaller number of bytes, possibly 0. This may result from any of a number of + * conditions; reaching end of file before n bytes have been skipped is only one possibility. The actual number of + * bytes skipped is returned. If n is negative, no bytes are skipped. + * + * Note repositioning the blob read stream will disable blob MD5 checking. + * + * @param n A long which represents the number of bytes to skip. + */ + @Override + public synchronized long skip(final long n) throws IOException { + AzurePath.ensureFileSystemOpen(path); + return this.blobInputStream.skip(n); + } + + BlobInputStream getBlobInputStream() { + return blobInputStream; + } +} diff --git a/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/NioBlobOutputStream.java b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/NioBlobOutputStream.java new file mode 100644 index 00000000000..ae5c0fa02b1 --- /dev/null +++ b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/NioBlobOutputStream.java @@ -0,0 +1,99 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.nio; + +import com.azure.core.util.logging.ClientLogger; +import com.azure.storage.blob.specialized.BlobOutputStream; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Path; + +/** + * Provides an OutputStream to write to a file stored as an Azure Blob. + */ +public final class NioBlobOutputStream extends OutputStream { + private static final ClientLogger LOGGER = new ClientLogger(NioBlobOutputStream.class); + + private final BlobOutputStream blobOutputStream; + private final Path path; + + NioBlobOutputStream(BlobOutputStream blobOutputStream, Path path) { + this.blobOutputStream = blobOutputStream; + this.path = path; + } + + @Override + public synchronized void write(int i) throws IOException { + AzurePath.ensureFileSystemOpen(path); + try { + this.blobOutputStream.write(i); + /* + BlobOutputStream only throws RuntimeException, and it doesn't preserve the cause, it only takes the message, + so we can't do any better than re-wrapping it in an IOException. + */ + } catch (RuntimeException e) { + throw LoggingUtility.logError(LOGGER, new IOException(e)); + } + } + + @Override + public synchronized void write(byte[] b) throws IOException { + AzurePath.ensureFileSystemOpen(path); + try { + this.blobOutputStream.write(b); + /* + BlobOutputStream only throws RuntimeException, and it doesn't preserve the cause, it only takes the message, + so we can't do any better than re-wrapping it in an IOException. + */ + } catch (RuntimeException e) { + throw LoggingUtility.logError(LOGGER, new IOException(e)); + } + } + + @Override + public synchronized void write(byte[] b, int off, int len) throws IOException { + AzurePath.ensureFileSystemOpen(path); + try { + this.blobOutputStream.write(b, off, len); + /* + BlobOutputStream only throws RuntimeException, and it doesn't preserve the cause, it only takes the message, + so we can't do any better than re-wrapping it in an IOException. + */ + } catch (RuntimeException e) { + if (e instanceof IndexOutOfBoundsException) { + throw LoggingUtility.logError(LOGGER, e); + } + throw LoggingUtility.logError(LOGGER, new IOException(e)); + } + } + + @Override + public synchronized void flush() throws IOException { + AzurePath.ensureFileSystemOpen(path); + try { + this.blobOutputStream.flush(); + /* + BlobOutputStream only throws RuntimeException, and it doesn't preserve the cause, it only takes the message, + so we can't do any better than re-wrapping it in an IOException. + */ + } catch (RuntimeException e) { + throw LoggingUtility.logError(LOGGER, new IOException(e)); + } + } + + @Override + public synchronized void close() throws IOException { + AzurePath.ensureFileSystemOpen(path); + try { + this.blobOutputStream.close(); + /* + BlobOutputStream only throws RuntimeException, and it doesn't preserve the cause, it only takes the message, + so we can't do any better than re-wrapping it in an IOException. + */ + } catch (RuntimeException e) { + throw LoggingUtility.logError(LOGGER, new IOException(e)); + } + } +} diff --git a/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/package-info.java b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/package-info.java new file mode 100644 index 00000000000..96cd1fbd627 --- /dev/null +++ b/azure-blob-nio/src/main/java/com/azure/storage/blob/nio/package-info.java @@ -0,0 +1,7 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +/** + * Package containing the classes for loading the AzureFileSystemProvider based on Azure Storage Blobs. + */ +package com.azure.storage.blob.nio; diff --git a/azure-blob-nio/src/main/resources/META-INF/services/java.nio.file.spi.FileSystemProvider b/azure-blob-nio/src/main/resources/META-INF/services/java.nio.file.spi.FileSystemProvider new file mode 100644 index 00000000000..5cc2b4ead14 --- /dev/null +++ b/azure-blob-nio/src/main/resources/META-INF/services/java.nio.file.spi.FileSystemProvider @@ -0,0 +1 @@ +com.azure.storage.blob.nio.AzureFileSystemProvider diff --git a/azure-blob-nio/src/main/resources/azure-storage-blob-nio.properties b/azure-blob-nio/src/main/resources/azure-storage-blob-nio.properties new file mode 100644 index 00000000000..ca812989b4f --- /dev/null +++ b/azure-blob-nio/src/main/resources/azure-storage-blob-nio.properties @@ -0,0 +1,2 @@ +name=${project.artifactId} +version=${project.version} diff --git a/azure-blob-nio/src/samples/java/com/azure/storage/blob/nio/ReadmeSamples.java b/azure-blob-nio/src/samples/java/com/azure/storage/blob/nio/ReadmeSamples.java new file mode 100644 index 00000000000..6c8c5e06e0b --- /dev/null +++ b/azure-blob-nio/src/samples/java/com/azure/storage/blob/nio/ReadmeSamples.java @@ -0,0 +1,129 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +package com.azure.storage.blob.nio; + +import com.azure.storage.blob.models.BlobHttpHeaders; +import com.azure.storage.common.StorageSharedKeyCredential; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.file.FileSystem; +import java.nio.file.FileSystems; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** + * WARNING: MODIFYING THIS FILE WILL REQUIRE CORRESPONDING UPDATES TO README.md FILE. LINE NUMBERS + * ARE USED TO EXTRACT APPROPRIATE CODE SEGMENTS FROM THIS FILE. ADD NEW CODE AT THE BOTTOM TO AVOID CHANGING + * LINE NUMBERS OF EXISTING CODE SAMPLES. + * + * Code samples for the README.md + */ +public class ReadmeSamples { + + private static final String CONTAINER_STORES = "container1,container2"; // A comma separated list of container names + private static final StorageSharedKeyCredential SHARE_KEY_CREDENTIAL + = new StorageSharedKeyCredential("", ""); + private static final Map CONFIG = new HashMap() { + { + put(AzureFileSystem.AZURE_STORAGE_SHARED_KEY_CREDENTIAL, SHARE_KEY_CREDENTIAL); + put(AzureFileSystem.AZURE_STORAGE_FILE_STORES, CONTAINER_STORES); + } + }; + private FileSystem myFs = FileSystems.newFileSystem(new URI("azb://?endpoint= config = new HashMap<>(); + String stores = ","; // A comma separated list of container names + StorageSharedKeyCredential credential = new StorageSharedKeyCredential(" attributes = Files.readAttributes(filePath, "azureBlob:metadata,headers"); + // END: readme-sample-readAttributesOnAFileString + } + + public void writeAttributesToAFile() throws IOException { + // BEGIN: readme-sample-writeAttributesToAFile + AzureBlobFileAttributeView view = Files.getFileAttributeView(filePath, AzureBlobFileAttributeView.class); + view.setMetadata(Collections.emptyMap()); + // END: readme-sample-writeAttributesToAFile + } + + public void writeAttributesToAFileString() throws IOException { + // BEGIN: readme-sample-writeAttributesToAFileString + Files.setAttribute(filePath, "azureBlob:blobHttpHeaders", new BlobHttpHeaders()); + // END: readme-sample-writeAttributesToAFileString + } +} diff --git a/backend/src/main/scala/cromwell/backend/io/GlobFunctions.scala b/backend/src/main/scala/cromwell/backend/io/GlobFunctions.scala index 6f0ae823bd8..680c073bf45 100644 --- a/backend/src/main/scala/cromwell/backend/io/GlobFunctions.scala +++ b/backend/src/main/scala/cromwell/backend/io/GlobFunctions.scala @@ -38,7 +38,7 @@ trait GlobFunctions extends IoFunctionSet with AsyncIoFunctions { import GlobFunctions._ val globPatternName = globName(pattern) val listFilePath = callContext.root.resolve(s"${globName(pattern)}.list") - asyncIo.readLinesAsync(listFilePath.toRealPath()) map { lines => + asyncIo.readLinesAsync(listFilePath.getSymlinkSafePath()) map { lines => lines.toList map { fileName => (callContext.root / globPatternName / fileName).pathAsString } diff --git a/build.sbt b/build.sbt index 76f6f27e2c9..2c9a8068992 100644 --- a/build.sbt +++ b/build.sbt @@ -37,6 +37,11 @@ lazy val wdlModelBiscayne = (project in wdlModelRoot / "biscayne") .dependsOn(wdlModelDraft3) .dependsOn(common % "test->test") +lazy val wdlModelCascades = (project in wdlModelRoot / "cascades") + .withLibrarySettings("cromwell-wdl-model-cascades") + .dependsOn(wdlSharedModel) + .dependsOn(common % "test->test") + lazy val wdlTransformsRoot = wdlRoot / "transforms" lazy val wdlSharedTransforms = (project in wdlTransformsRoot / "shared") @@ -74,6 +79,13 @@ lazy val wdlTransformsBiscayne = (project in wdlTransformsRoot / "biscayne") .dependsOn(common % "test->test") .dependsOn(wom % "test->test") +lazy val wdlTransformsCascades = (project in wdlTransformsRoot / "cascades") + .withLibrarySettings("cromwell-wdl-transforms-cascades", wdlDependencies) + .dependsOn(wdlNewBaseTransforms) + .dependsOn(languageFactoryCore) + .dependsOn(common % "test->test") + .dependsOn(wom % "test->test") + lazy val core = project .withLibrarySettings("cromwell-core", coreDependencies) .dependsOn(wom) @@ -85,11 +97,17 @@ lazy val cloudSupport = project .dependsOn(common) .dependsOn(common % "test->test") +lazy val azureBlobNio = (project in file("azure-blob-nio")) + .withLibrarySettings("cromwell-azure-blobNio", azureBlobNioDependencies) + lazy val azureBlobFileSystem = (project in file("filesystems/blob")) .withLibrarySettings("cromwell-azure-blobFileSystem", blobFileSystemDependencies) .dependsOn(core) + .dependsOn(cloudSupport) + .dependsOn(azureBlobNio) .dependsOn(core % "test->test") .dependsOn(common % "test->test") + .dependsOn(azureBlobNio % "test->test") lazy val awsS3FileSystem = (project in file("filesystems/s3")) .withLibrarySettings("cromwell-aws-s3filesystem", s3FileSystemDependencies) @@ -148,6 +166,7 @@ lazy val databaseMigration = (project in file("database/migration")) lazy val dockerHashing = project .withLibrarySettings("cromwell-docker-hashing", dockerHashingDependencies) + .dependsOn(cloudSupport) .dependsOn(core) .dependsOn(core % "test->test") .dependsOn(common % "test->test") @@ -177,6 +196,7 @@ lazy val services = project .dependsOn(wdlDraft2LanguageFactory % "test->test") // because the WaaS tests init language config with all languages .dependsOn(wdlDraft3LanguageFactory % "test->test") .dependsOn(wdlBiscayneLanguageFactory % "test->test") + .dependsOn(wdlCascadesLanguageFactory % "test->test") .dependsOn(core % "test->test") .dependsOn(ftpFileSystem % "test->test") .dependsOn(common % "test->test") @@ -215,6 +235,19 @@ lazy val googlePipelinesV2Beta = (project in backendRoot / "google" / "pipelines .dependsOn(core % "test->test") .dependsOn(common % "test->test") +lazy val googleBatch = (project in backendRoot / "google" / "batch") + .withLibrarySettings("cromwell-google-batch-backend") + .dependsOn(backend) + .dependsOn(gcsFileSystem) + .dependsOn(drsFileSystem) + .dependsOn(sraFileSystem) + .dependsOn(httpFileSystem) + .dependsOn(backend % "test->test") + .dependsOn(gcsFileSystem % "test->test") + .dependsOn(services % "test->test") + .dependsOn(common % "test->test") + .dependsOn(core % "test->test") + lazy val awsBackend = (project in backendRoot / "aws") .withLibrarySettings("cromwell-aws-backend") .dependsOn(backend) @@ -249,6 +282,7 @@ lazy val engine = project .dependsOn(backend) .dependsOn(gcsFileSystem) .dependsOn(drsFileSystem) + .dependsOn(httpFileSystem) .dependsOn(sraFileSystem) .dependsOn(awsS3FileSystem) .dependsOn(azureBlobFileSystem) @@ -262,6 +296,7 @@ lazy val engine = project .dependsOn(wdlDraft2LanguageFactory % "test->test") .dependsOn(wdlDraft3LanguageFactory % "test->test") .dependsOn(wdlBiscayneLanguageFactory % "test->test") + .dependsOn(wdlCascadesLanguageFactory % "test->test") .dependsOn(common % "test->test") .dependsOn(core % "test->test") .dependsOn(backend % "test->test") @@ -278,6 +313,7 @@ lazy val womtool = project .dependsOn(wdlDraft2LanguageFactory) .dependsOn(wdlDraft3LanguageFactory) .dependsOn(wdlBiscayneLanguageFactory) + .dependsOn(wdlCascadesLanguageFactory) .dependsOn(wom % "test->test") .dependsOn(common % "test->test") @@ -323,6 +359,13 @@ lazy val wdlBiscayneLanguageFactory = (project in languageFactoryRoot / "wdl-bis .dependsOn(wdlTransformsBiscayne) .dependsOn(common % "test->test") +lazy val wdlCascadesLanguageFactory = (project in languageFactoryRoot / "wdl-cascades") + .withLibrarySettings("wdl-cascades") + .dependsOn(languageFactoryCore) + .dependsOn(wdlModelCascades) + .dependsOn(wdlTransformsCascades) + .dependsOn(common % "test->test") + lazy val `cloud-nio-spi` = (project in cloudNio / "cloud-nio-spi") .withLibrarySettings(libraryName = "cloud-nio-spi", dependencies = spiDependencies) .dependsOn(common % "test->test") @@ -354,17 +397,24 @@ lazy val `cromwell-drs-localizer` = project .dependsOn(common) .dependsOn(`cloud-nio-impl-drs` % "test->test") +lazy val pact4s = project.in(file("pact4s")) + .settings(pact4sSettings) + .dependsOn(services) + .disablePlugins(sbtassembly.AssemblyPlugin) + lazy val server = project .withExecutableSettings("cromwell", serverDependencies) .dependsOn(engine) .dependsOn(googlePipelinesV2Alpha1) .dependsOn(googlePipelinesV2Beta) + .dependsOn(googleBatch) .dependsOn(awsBackend) .dependsOn(tesBackend) .dependsOn(cromwellApiClient) .dependsOn(wdlDraft2LanguageFactory) .dependsOn(wdlDraft3LanguageFactory) .dependsOn(wdlBiscayneLanguageFactory) + .dependsOn(wdlCascadesLanguageFactory) .dependsOn(engine % "test->test") .dependsOn(common % "test->test") @@ -378,6 +428,7 @@ lazy val root = (project in file(".")) .aggregate(`cromwell-drs-localizer`) .aggregate(awsBackend) .aggregate(awsS3FileSystem) + .aggregate(azureBlobNio) .aggregate(azureBlobFileSystem) .aggregate(backend) .aggregate(centaur) @@ -396,6 +447,7 @@ lazy val root = (project in file(".")) .aggregate(googlePipelinesCommon) .aggregate(googlePipelinesV2Alpha1) .aggregate(googlePipelinesV2Beta) + .aggregate(googleBatch) .aggregate(httpFileSystem) .aggregate(languageFactoryCore) .aggregate(perf) @@ -405,18 +457,22 @@ lazy val root = (project in file(".")) .aggregate(sraFileSystem) .aggregate(tesBackend) .aggregate(wdlBiscayneLanguageFactory) + .aggregate(wdlCascadesLanguageFactory) .aggregate(wdlDraft2LanguageFactory) .aggregate(wdlDraft3LanguageFactory) .aggregate(wdlModelBiscayne) + .aggregate(wdlModelCascades) .aggregate(wdlModelDraft2) .aggregate(wdlModelDraft3) .aggregate(wdlNewBaseTransforms) .aggregate(wdlSharedModel) .aggregate(wdlSharedTransforms) .aggregate(wdlTransformsBiscayne) + .aggregate(wdlTransformsCascades) .aggregate(wdlTransformsDraft2) .aggregate(wdlTransformsDraft3) .aggregate(wes2cromwell) .aggregate(wom) .aggregate(womtool) + .aggregate(pact4s) .withAggregateSettings() diff --git a/centaur/src/main/resources/azureBlobTestCases/azure_blob_storage_read.test b/centaur/src/main/resources/azureBlobTestCases/azure_blob_storage_read.test new file mode 100644 index 00000000000..b7270ccfb11 --- /dev/null +++ b/centaur/src/main/resources/azureBlobTestCases/azure_blob_storage_read.test @@ -0,0 +1,24 @@ +name: azure_blob_storage_read +testFormat: workflowsuccess +backends: [Local] +tags: ["blob", "azure"] +retryTestFailures: false + +files { + workflow: azure_blob_storage_read/azure_blob_storage_read.wdl + inputs: azure_blob_storage_read/azure_blob_storage_read.inputs + options: azure_blob_storage_read/azure_blob_storage_read.options +} + +metadata { + status: Succeeded + "outputs.azure_blob_storage_read.s1": "This is my test file! Did it work??" +} + +# az:// is the root of the container specified in reference.conf. +# Here, we verify that exactly one log was written. + +fileSystemCheck: "blob" +outputExpectations: { + "az://test-cromwell-workflow-logs/workflow.<>.log" : 1 +} diff --git a/centaur/src/main/resources/azureBlobTestCases/azure_blob_storage_read/azure_blob_storage_read.inputs b/centaur/src/main/resources/azureBlobTestCases/azure_blob_storage_read/azure_blob_storage_read.inputs new file mode 100644 index 00000000000..c81e166493f --- /dev/null +++ b/centaur/src/main/resources/azureBlobTestCases/azure_blob_storage_read/azure_blob_storage_read.inputs @@ -0,0 +1,3 @@ +{ + "azure_blob_storage_read.file1": "https://centaurtesting.blob.core.windows.net/test-blob/testRead.txt" +} diff --git a/centaur/src/main/resources/azureBlobTestCases/azure_blob_storage_read/azure_blob_storage_read.options b/centaur/src/main/resources/azureBlobTestCases/azure_blob_storage_read/azure_blob_storage_read.options new file mode 100644 index 00000000000..8d68fcdd6bf --- /dev/null +++ b/centaur/src/main/resources/azureBlobTestCases/azure_blob_storage_read/azure_blob_storage_read.options @@ -0,0 +1,3 @@ +{ + "final_workflow_log_dir": "https://centaurtesting.blob.core.windows.net/test-blob/test-cromwell-workflow-logs" +} diff --git a/centaur/src/main/resources/azureBlobTestCases/azure_blob_storage_read/azure_blob_storage_read.wdl b/centaur/src/main/resources/azureBlobTestCases/azure_blob_storage_read/azure_blob_storage_read.wdl new file mode 100644 index 00000000000..d19d417cdd5 --- /dev/null +++ b/centaur/src/main/resources/azureBlobTestCases/azure_blob_storage_read/azure_blob_storage_read.wdl @@ -0,0 +1,12 @@ +version 1.0 + +workflow azure_blob_storage_read { + + input { + File file1 + } + + output { + String s1 = read_string(file1) + } +} diff --git a/centaur/src/main/resources/reference.conf b/centaur/src/main/resources/reference.conf index a0428e01235..d570b5beef3 100644 --- a/centaur/src/main/resources/reference.conf +++ b/centaur/src/main/resources/reference.conf @@ -86,5 +86,11 @@ centaur { include "centaur_aws_credentials.conf" } + azure { + container: "test-blob" + endpoint: "https://centaurtesting.blob.core.windows.net" + subscription: "62b22893-6bc1-46d9-8a90-806bb3cce3c9" + } + log-request-failures = false } diff --git a/centaur/src/main/resources/standardTestCases/biscayne_prohibits_directory.test b/centaur/src/main/resources/standardTestCases/biscayne_prohibits_directory.test new file mode 100644 index 00000000000..d2d9bfc9ddf --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/biscayne_prohibits_directory.test @@ -0,0 +1,13 @@ +name: directory_type_local_denied +testFormat: workflowfailure +tags: [localdockertest, "wdl_biscayne"] +backends: [Local, LocalNoDocker] + +files { + workflow: wdl_biscayne/biscayne_prohibits_directory/directory_type.wdl + inputs: wdl_biscayne/biscayne_prohibits_directory/directory_type_local_inputs.json +} + +metadata { + status: Failed +} diff --git a/centaur/src/main/resources/standardTestCases/directory_type_local.test b/centaur/src/main/resources/standardTestCases/directory_type_local.test index f2db3398d96..ae0f90dcc72 100644 --- a/centaur/src/main/resources/standardTestCases/directory_type_local.test +++ b/centaur/src/main/resources/standardTestCases/directory_type_local.test @@ -1,11 +1,11 @@ name: directory_type_local testFormat: workflowsuccess -tags: [localdockertest, "wdl_biscayne"] +tags: [localdockertest, "wdl_cascades"] backends: [Local, LocalNoDocker] files { - workflow: wdl_biscayne/directory_type/directory_type.wdl - inputs: wdl_biscayne/directory_type/directory_type_local_inputs.json + workflow: wdl_cascades/directory_type/directory_type.wdl + inputs: wdl_cascades/directory_type/directory_type_local_inputs.json } metadata { diff --git a/centaur/src/main/resources/standardTestCases/directory_type_output_papi.test b/centaur/src/main/resources/standardTestCases/directory_type_output_papi.test index f66dfd656b3..67e435db619 100644 --- a/centaur/src/main/resources/standardTestCases/directory_type_output_papi.test +++ b/centaur/src/main/resources/standardTestCases/directory_type_output_papi.test @@ -1,10 +1,10 @@ name: directory_type_output_papi testFormat: workflowsuccess -tags: ["wdl_biscayne"] +tags: ["wdl_cascades"] backends: [Papi] files { - workflow: wdl_biscayne/directory_type_output/directory_type_output.wdl + workflow: wdl_cascades/directory_type_output/directory_type_output.wdl } metadata { diff --git a/centaur/src/main/resources/standardTestCases/directory_type_papi.test b/centaur/src/main/resources/standardTestCases/directory_type_papi.test index 988a609d237..4d953e03c4e 100644 --- a/centaur/src/main/resources/standardTestCases/directory_type_papi.test +++ b/centaur/src/main/resources/standardTestCases/directory_type_papi.test @@ -1,10 +1,10 @@ name: directory_type_papi testFormat: workflowsuccess -tags: ["wdl_biscayne"] +tags: ["wdl_cascades"] backends: [Papi] files { - workflow: wdl_biscayne/directory_type/directory_type.wdl + workflow: wdl_cascades/directory_type/directory_type.wdl } metadata { diff --git a/centaur/src/main/resources/standardTestCases/draft3_read_file_limits.test b/centaur/src/main/resources/standardTestCases/draft3_read_file_limits.test index 4a9af0c8813..4bcbdd38db7 100644 --- a/centaur/src/main/resources/standardTestCases/draft3_read_file_limits.test +++ b/centaur/src/main/resources/standardTestCases/draft3_read_file_limits.test @@ -2,6 +2,7 @@ name: draft3_read_file_limits testFormat: workflowfailure workflowType: WDL workflowTypeVersion: 1.0 +tags: [batchexclude] files { workflow: wdl_draft3/read_file_limits/read_file_limits.wdl diff --git a/centaur/src/main/resources/standardTestCases/drs_tests/drs_usa_jdr.wdl b/centaur/src/main/resources/standardTestCases/drs_tests/drs_usa_jdr.wdl index ba2a17f292d..e9b56af98d2 100644 --- a/centaur/src/main/resources/standardTestCases/drs_tests/drs_usa_jdr.wdl +++ b/centaur/src/main/resources/standardTestCases/drs_tests/drs_usa_jdr.wdl @@ -61,7 +61,7 @@ task localize_jdr_drs_with_usa { } runtime { - docker: "ubuntu" + docker: "ubuntu:latest" backend: "papi-v2-usa" } } @@ -88,7 +88,7 @@ task skip_localize_jdr_drs_with_usa { } runtime { - docker: "ubuntu" + docker: "ubuntu:latest" backend: "papi-v2-usa" } } @@ -109,7 +109,7 @@ task read_drs_with_usa { } runtime { - docker: "ubuntu" + docker: "ubuntu:latest" backend: "papi-v2-usa" } } diff --git a/centaur/src/main/resources/standardTestCases/invalidate_bad_caches_use_good_local.test b/centaur/src/main/resources/standardTestCases/invalidate_bad_caches_use_good_local.test index 2d3bc8a4e31..4249e041c47 100644 --- a/centaur/src/main/resources/standardTestCases/invalidate_bad_caches_use_good_local.test +++ b/centaur/src/main/resources/standardTestCases/invalidate_bad_caches_use_good_local.test @@ -3,6 +3,10 @@ testFormat: workflowsuccess backends: [Local] tags: [localdockertest] +# This test stopped working 8/23 but its cloud equivalent that we care about is fine [0] +# [0] `invalidate_bad_caches_use_good_jes.test` +ignore: true + files { workflow: invalidate_bad_caches/invalidate_bad_caches_use_good.wdl inputs: invalidate_bad_caches/local.inputs diff --git a/centaur/src/main/resources/standardTestCases/long_cmd.test b/centaur/src/main/resources/standardTestCases/long_cmd.test index 40b6110b629..cef5fda2177 100644 --- a/centaur/src/main/resources/standardTestCases/long_cmd.test +++ b/centaur/src/main/resources/standardTestCases/long_cmd.test @@ -9,6 +9,7 @@ name: long_cmd testFormat: workflowsuccess +tags: [batchexclude] files { workflow: long_cmd/long_cmd.wdl diff --git a/centaur/src/main/resources/standardTestCases/read_file_limits.test b/centaur/src/main/resources/standardTestCases/read_file_limits.test index 0079401812a..734ab809b92 100644 --- a/centaur/src/main/resources/standardTestCases/read_file_limits.test +++ b/centaur/src/main/resources/standardTestCases/read_file_limits.test @@ -1,5 +1,6 @@ name: read_file_limits testFormat: workflowfailure +tags: [batchexclude] files { workflow: read_file_limits/read_file_limits.wdl diff --git a/centaur/src/main/resources/standardTestCases/relative_output_paths_colliding.test b/centaur/src/main/resources/standardTestCases/relative_output_paths_colliding.test index 82b01c6399d..2a6fca6793e 100644 --- a/centaur/src/main/resources/standardTestCases/relative_output_paths_colliding.test +++ b/centaur/src/main/resources/standardTestCases/relative_output_paths_colliding.test @@ -1,5 +1,6 @@ name: relative_output_paths_colliding testFormat: workflowfailure +tags: [batchexclude] files { workflow: relative_output_paths_colliding/workflow_output_paths_colliding.wdl diff --git a/centaur/src/main/resources/standardTestCases/standard_output_paths_colliding_prevented.test b/centaur/src/main/resources/standardTestCases/standard_output_paths_colliding_prevented.test index 6c5a5b51476..d8d37b4b2d0 100644 --- a/centaur/src/main/resources/standardTestCases/standard_output_paths_colliding_prevented.test +++ b/centaur/src/main/resources/standardTestCases/standard_output_paths_colliding_prevented.test @@ -1,5 +1,6 @@ name: standard_output_paths_colliding_prevented testFormat: workflowsuccess +tags: [batchexclude] files { workflow: standard_output_paths_colliding_prevented/workflow_output_paths_colliding.wdl diff --git a/centaur/src/main/resources/standardTestCases/wdl_biscayne/biscayne_http_relative_imports/biscayne_http_relative_imports.wdl b/centaur/src/main/resources/standardTestCases/wdl_biscayne/biscayne_http_relative_imports/biscayne_http_relative_imports.wdl index c75ffacc8a8..dbe4fdee0f8 100644 --- a/centaur/src/main/resources/standardTestCases/wdl_biscayne/biscayne_http_relative_imports/biscayne_http_relative_imports.wdl +++ b/centaur/src/main/resources/standardTestCases/wdl_biscayne/biscayne_http_relative_imports/biscayne_http_relative_imports.wdl @@ -1,4 +1,4 @@ -version development +version development-1.1 # Is there a better way to test this? import "https://raw.githubusercontent.com/broadinstitute/cromwell/develop/womtool/src/test/resources/validate/biscayne/valid/relative_imports/sub_wfs/foo.wdl" diff --git a/centaur/src/main/resources/standardTestCases/wdl_biscayne/biscayne_new_engine_functions/biscayne_new_engine_functions.wdl b/centaur/src/main/resources/standardTestCases/wdl_biscayne/biscayne_new_engine_functions/biscayne_new_engine_functions.wdl index 979ef6e998a..220fcb535e5 100644 --- a/centaur/src/main/resources/standardTestCases/wdl_biscayne/biscayne_new_engine_functions/biscayne_new_engine_functions.wdl +++ b/centaur/src/main/resources/standardTestCases/wdl_biscayne/biscayne_new_engine_functions/biscayne_new_engine_functions.wdl @@ -1,4 +1,4 @@ -version development +version development-1.1 workflow biscayne_new_engine_functions { diff --git a/centaur/src/main/resources/standardTestCases/wdl_biscayne/biscayne_prohibits_directory/directory_type.wdl b/centaur/src/main/resources/standardTestCases/wdl_biscayne/biscayne_prohibits_directory/directory_type.wdl new file mode 100644 index 00000000000..7bd7f5583c3 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/wdl_biscayne/biscayne_prohibits_directory/directory_type.wdl @@ -0,0 +1,49 @@ +version development-1.1 + +workflow directory_type { + input { + String text2loc = "text2" + } + call make_directory { input: text2loc = text2loc } + call read_from_directory { input: d = make_directory.d, text2loc = text2loc } + + output { + Array[String] out = read_from_directory.contents + } +} + +task make_directory { + input { + String text2loc + } + String text2dir = sub("foo/~{text2loc}", "/[^/]*$", "") + command { + mkdir foo + mkdir -p ~{text2dir} + echo "foo text" > foo/text + echo "foo text2" > foo/~{text2loc} + } + runtime { + docker: "ubuntu:latest" + } + output { + Directory d = "foo/" + } +} + +task read_from_directory { + input { + String text2loc + Directory d + } + command { + cat ~{d}/text + cat ~{d}/~{text2loc} + } + runtime { + docker: "ubuntu:latest" + } + output { + Array[String] contents = read_lines(stdout()) + } +} diff --git a/centaur/src/main/resources/standardTestCases/wdl_biscayne/directory_type/directory_type_local_inputs.json b/centaur/src/main/resources/standardTestCases/wdl_biscayne/biscayne_prohibits_directory/directory_type_local_inputs.json similarity index 100% rename from centaur/src/main/resources/standardTestCases/wdl_biscayne/directory_type/directory_type_local_inputs.json rename to centaur/src/main/resources/standardTestCases/wdl_biscayne/biscayne_prohibits_directory/directory_type_local_inputs.json diff --git a/centaur/src/main/resources/standardTestCases/wdl_biscayne/default_default/default_default.wdl b/centaur/src/main/resources/standardTestCases/wdl_biscayne/default_default/default_default.wdl index 20958128db5..a73b3736fdf 100644 --- a/centaur/src/main/resources/standardTestCases/wdl_biscayne/default_default/default_default.wdl +++ b/centaur/src/main/resources/standardTestCases/wdl_biscayne/default_default/default_default.wdl @@ -1,4 +1,4 @@ -version development +version development-1.1 workflow default_default { call default_default_task diff --git a/centaur/src/main/resources/standardTestCases/wdl_biscayne/read_functions_windows_line_endings/read_functions_windows_line_endings.wdl b/centaur/src/main/resources/standardTestCases/wdl_biscayne/read_functions_windows_line_endings/read_functions_windows_line_endings.wdl index 033bdfc4d97..64b792fd5dd 100644 --- a/centaur/src/main/resources/standardTestCases/wdl_biscayne/read_functions_windows_line_endings/read_functions_windows_line_endings.wdl +++ b/centaur/src/main/resources/standardTestCases/wdl_biscayne/read_functions_windows_line_endings/read_functions_windows_line_endings.wdl @@ -1,4 +1,4 @@ -version development +version development-1.1 struct JsonObj { String field1 diff --git a/centaur/src/main/resources/standardTestCases/wdl_biscayne/directory_type/directory_type.wdl b/centaur/src/main/resources/standardTestCases/wdl_cascades/directory_type/directory_type.wdl similarity index 100% rename from centaur/src/main/resources/standardTestCases/wdl_biscayne/directory_type/directory_type.wdl rename to centaur/src/main/resources/standardTestCases/wdl_cascades/directory_type/directory_type.wdl diff --git a/centaur/src/main/resources/standardTestCases/wdl_cascades/directory_type/directory_type_local_inputs.json b/centaur/src/main/resources/standardTestCases/wdl_cascades/directory_type/directory_type_local_inputs.json new file mode 100644 index 00000000000..cfe1a8bb688 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/wdl_cascades/directory_type/directory_type_local_inputs.json @@ -0,0 +1,3 @@ +{ + "directory_type.text2loc": "bar/text2" +} diff --git a/centaur/src/main/resources/standardTestCases/wdl_biscayne/directory_type_output/directory_type_output.wdl b/centaur/src/main/resources/standardTestCases/wdl_cascades/directory_type_output/directory_type_output.wdl similarity index 100% rename from centaur/src/main/resources/standardTestCases/wdl_biscayne/directory_type_output/directory_type_output.wdl rename to centaur/src/main/resources/standardTestCases/wdl_cascades/directory_type_output/directory_type_output.wdl diff --git a/centaur/src/main/scala/centaur/test/FilesChecker.scala b/centaur/src/main/scala/centaur/test/FilesChecker.scala index 240552bb755..5f3f1d9e170 100644 --- a/centaur/src/main/scala/centaur/test/FilesChecker.scala +++ b/centaur/src/main/scala/centaur/test/FilesChecker.scala @@ -41,3 +41,14 @@ object AWSFilesChecker extends FilesChecker { override def countObjectsAtPath: String => Int = s3Client.countObjects(s3PrefixRegex) } + +object BlobFilesChecker extends FilesChecker { + import ObjectCounterInstances.blobObjectCounter + import ObjectCounterSyntax._ + + private lazy val containerClient = Operations.blobContainerClient + + // The root of the endpoint + container specified in reference.conf will be substituted for az:// + private val azurePrefixRange = "^az:\\/\\/.*" + override def countObjectsAtPath: String => Int = ObjectCounterSyntax(containerClient).countObjects(azurePrefixRange) +} diff --git a/centaur/src/main/scala/centaur/test/ObjectCounter.scala b/centaur/src/main/scala/centaur/test/ObjectCounter.scala index 46affc7d552..124f78e1dc8 100644 --- a/centaur/src/main/scala/centaur/test/ObjectCounter.scala +++ b/centaur/src/main/scala/centaur/test/ObjectCounter.scala @@ -1,5 +1,6 @@ package centaur.test +import com.azure.storage.blob.BlobContainerClient import com.google.cloud.storage.Storage.BlobListOption import com.google.cloud.storage.{Blob, Storage} import software.amazon.awssdk.services.s3.S3Client @@ -38,6 +39,25 @@ object ObjectCounterInstances { storage.list(g.bucket, BlobListOption.prefix(g.directory)).iterateAll.asScala listObjectsAtPath(_).size } + + implicit val blobObjectCounter: ObjectCounter[BlobContainerClient] = (containerClient : BlobContainerClient) => { + val pathToInt: Path => Int = providedPath => { + //Our path parsing is somewhat GCP centric. Convert to a blob path starting from the container root. + def pathToBlobPath(parsedPath : Path) : String = { + (Option(parsedPath.bucket), Option(parsedPath.directory)) match { + case (None, _) => "" + case (Some(_), None) => parsedPath.bucket + case (Some(_), Some(_)) => parsedPath.bucket + "/" + parsedPath.directory + } + } + + val fullPath = pathToBlobPath(providedPath) + val blobsInFolder = containerClient.listBlobsByHierarchy(fullPath) + //if something "isPrefix", it's a directory. Otherwise, its a file. We just want to count files. + blobsInFolder.asScala.count(!_.isPrefix) + } + pathToInt(_) + } } object ObjectCounterSyntax { diff --git a/centaur/src/main/scala/centaur/test/Test.scala b/centaur/src/main/scala/centaur/test/Test.scala index 470de959381..d0a56a2cbf5 100644 --- a/centaur/src/main/scala/centaur/test/Test.scala +++ b/centaur/src/main/scala/centaur/test/Test.scala @@ -10,6 +10,7 @@ import centaur.test.metadata.WorkflowFlatMetadata import centaur.test.metadata.WorkflowFlatMetadata._ import centaur.test.submit.SubmitHttpResponse import centaur.test.workflow.Workflow +import com.azure.storage.blob.BlobContainerClient import com.google.api.services.genomics.v2alpha1.{Genomics, GenomicsScopes} import com.google.api.services.storage.StorageScopes import com.google.auth.Credentials @@ -23,6 +24,7 @@ import configs.syntax._ import cromwell.api.CromwellClient.UnsuccessfulRequestException import cromwell.api.model.{CallCacheDiff, Failed, HashDifference, SubmittedWorkflow, Succeeded, TerminalStatus, WaasDescription, WorkflowId, WorkflowMetadata, WorkflowStatus} import cromwell.cloudsupport.aws.AwsConfiguration +import cromwell.cloudsupport.azure.{AzureUtils} import cromwell.cloudsupport.gcp.GoogleConfiguration import cromwell.cloudsupport.gcp.auth.GoogleAuthMode import io.circe.parser._ @@ -150,6 +152,13 @@ object Operations extends StrictLogging { .build() } + lazy val azureConfig: Config = CentaurConfig.conf.getConfig("azure") + val azureSubscription = azureConfig.getString("subscription") + val blobContainer = azureConfig.getString("container") + val azureEndpoint = azureConfig.getString("endpoint") + //NB: Centaur will throw an exception if it isn't able to authenticate with Azure blob storage via the local environment. + lazy val blobContainerClient: BlobContainerClient = AzureUtils.buildContainerClientFromLocalEnvironment(blobContainer, azureEndpoint, Option(azureSubscription)).get + def submitWorkflow(workflow: Workflow): Test[SubmittedWorkflow] = { new Test[SubmittedWorkflow] { override def run: IO[SubmittedWorkflow] = for { diff --git a/centaur/src/main/scala/centaur/test/workflow/DirectoryContentCountCheck.scala b/centaur/src/main/scala/centaur/test/workflow/DirectoryContentCountCheck.scala index c5813165aea..2bf90619dab 100644 --- a/centaur/src/main/scala/centaur/test/workflow/DirectoryContentCountCheck.scala +++ b/centaur/src/main/scala/centaur/test/workflow/DirectoryContentCountCheck.scala @@ -2,7 +2,7 @@ package centaur.test.workflow import cats.data.Validated._ import cats.syntax.all._ -import centaur.test.{AWSFilesChecker, FilesChecker, LocalFilesChecker, PipelinesFilesChecker} +import centaur.test.{AWSFilesChecker, FilesChecker, LocalFilesChecker, PipelinesFilesChecker, BlobFilesChecker} import com.typesafe.config.Config import common.validation.ErrorOr.ErrorOr import configs.Result @@ -25,8 +25,9 @@ object DirectoryContentCountCheck { case Result.Success("gcs") => valid(PipelinesFilesChecker) case Result.Success("local") => valid(LocalFilesChecker) case Result.Success("aws") => valid(AWSFilesChecker) - case Result.Success(_) => invalidNel(s"Test '$name': Invalid 'fileSystemCheck' value (must be either 'local', 'gcs' or 'aws'") - case Result.Failure(_) => invalidNel(s"Test '$name': Must specify a 'fileSystemCheck' value (must be either 'local', 'gcs' or 'aws'") + case Result.Success("blob") => valid(BlobFilesChecker) + case Result.Success(_) => invalidNel(s"Test '$name': Invalid 'fileSystemCheck' value (must be either 'local', 'gcs', 'blob', or 'aws'") + case Result.Failure(_) => invalidNel(s"Test '$name': Must specify a 'fileSystemCheck' value (must be either 'local', 'gcs', 'blob', or 'aws'") } (directoryContentCountsValidation, fileSystemChecker) mapN { (d, f) => Option(DirectoryContentCountCheck(d, f)) } diff --git a/centaur/test_cromwell.sh b/centaur/test_cromwell.sh index 2f330f1feb0..9161f2da5ad 100755 --- a/centaur/test_cromwell.sh +++ b/centaur/test_cromwell.sh @@ -37,7 +37,7 @@ Arguments: INITIAL_DIR=$(pwd) RUN_DIR=$(pwd) LOG_DIR="${RUN_DIR}"/logs -TEST_THREAD_COUNT=16 +TEST_THREAD_COUNT=16 # Note that most users of this script override this value CENTAUR_SBT_COVERAGE=false CROMWELL_TIMEOUT=10s SUITE="" diff --git a/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/DrsConfig.scala b/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/DrsConfig.scala index c8333a57a66..a2b0a385680 100644 --- a/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/DrsConfig.scala +++ b/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/DrsConfig.scala @@ -17,9 +17,9 @@ final case class DrsConfig(drsResolverUrl: String, object DrsConfig { // If you update these values also update Filesystems.md! private val DefaultNumRetries = 3 - private val DefaultWaitInitial = 10 seconds - private val DefaultWaitMaximum = 30 seconds - private val DefaultWaitMultiplier = 1.5d + private val DefaultWaitInitial = 30 seconds + private val DefaultWaitMaximum = 60 seconds + private val DefaultWaitMultiplier = 1.25d private val DefaultWaitRandomizationFactor = 0.1 private val EnvDrsResolverUrl = "DRS_RESOLVER_URL" diff --git a/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/DrsPathResolver.scala b/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/DrsPathResolver.scala index f9ae5b62e03..22d86c31726 100644 --- a/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/DrsPathResolver.scala +++ b/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/DrsPathResolver.scala @@ -17,6 +17,7 @@ import org.apache.commons.lang3.exception.ExceptionUtils import org.apache.http.client.methods.{HttpGet, HttpPost} import org.apache.http.entity.{ContentType, StringEntity} import org.apache.http.impl.client.HttpClientBuilder +import org.apache.http.impl.conn.PoolingHttpClientConnectionManager import org.apache.http.util.EntityUtils import org.apache.http.{HttpResponse, HttpStatus, StatusLine} @@ -24,16 +25,16 @@ import java.nio.ByteBuffer import java.nio.channels.{Channels, ReadableByteChannel} import scala.util.Try -abstract class DrsPathResolver(drsConfig: DrsConfig, retryInternally: Boolean = true) { +abstract class DrsPathResolver(drsConfig: DrsConfig) { protected lazy val httpClientBuilder: HttpClientBuilder = { val clientBuilder = HttpClientBuilder.create() - if (retryInternally) { - val retryHandler = new DrsResolverHttpRequestRetryStrategy(drsConfig) - clientBuilder - .setRetryHandler(retryHandler) - .setServiceUnavailableRetryStrategy(retryHandler) - } + val retryHandler = new DrsResolverHttpRequestRetryStrategy(drsConfig) + clientBuilder + .setRetryHandler(retryHandler) + .setServiceUnavailableRetryStrategy(retryHandler) + clientBuilder.setConnectionManager(connectionManager) + clientBuilder.setConnectionManagerShared(true) clientBuilder } @@ -241,4 +242,13 @@ object DrsResolverResponseSupport { baseMessage + "(empty response)" } } + + lazy val connectionManager = { + val connManager = new PoolingHttpClientConnectionManager() + connManager.setMaxTotal(250) + // Since the HttpClient is always talking to DRSHub, + // make the max connections per route the same as max total connections + connManager.setDefaultMaxPerRoute(250) + connManager + } } diff --git a/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/EngineDrsPathResolver.scala b/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/EngineDrsPathResolver.scala index a62ce7971c2..01f7a488eb3 100644 --- a/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/EngineDrsPathResolver.scala +++ b/cloud-nio/cloud-nio-impl-drs/src/main/scala/cloud/nio/impl/drs/EngineDrsPathResolver.scala @@ -5,7 +5,7 @@ import common.validation.ErrorOr.ErrorOr case class EngineDrsPathResolver(drsConfig: DrsConfig, drsCredentials: DrsCredentials, ) - extends DrsPathResolver(drsConfig, retryInternally = false) { + extends DrsPathResolver(drsConfig) { override def getAccessToken: ErrorOr[String] = drsCredentials.getAccessToken } diff --git a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/AzureCredentials.scala b/cloudSupport/src/main/scala/cromwell/cloudsupport/azure/AzureCredentials.scala similarity index 93% rename from filesystems/blob/src/main/scala/cromwell/filesystems/blob/AzureCredentials.scala rename to cloudSupport/src/main/scala/cromwell/cloudsupport/azure/AzureCredentials.scala index ae84e39adbe..200b162c614 100644 --- a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/AzureCredentials.scala +++ b/cloudSupport/src/main/scala/cromwell/cloudsupport/azure/AzureCredentials.scala @@ -1,4 +1,4 @@ -package cromwell.filesystems.blob +package cromwell.cloudsupport.azure import cats.implicits.catsSyntaxValidatedId import com.azure.core.credential.TokenRequestContext @@ -9,7 +9,6 @@ import common.validation.ErrorOr.ErrorOr import scala.concurrent.duration._ import scala.jdk.DurationConverters._ - import scala.util.{Failure, Success, Try} /** @@ -34,7 +33,7 @@ case object AzureCredentials { new DefaultAzureCredentialBuilder() .authorityHost(azureProfile.getEnvironment.getActiveDirectoryEndpoint) - def getAccessToken(identityClientId: Option[String]): ErrorOr[String] = { + def getAccessToken(identityClientId: Option[String] = None): ErrorOr[String] = { val credentials = identityClientId.foldLeft(defaultCredentialBuilder) { (builder, clientId) => builder.managedIdentityClientId(clientId) }.build() diff --git a/cloudSupport/src/main/scala/cromwell/cloudsupport/azure/AzureUtils.scala b/cloudSupport/src/main/scala/cromwell/cloudsupport/azure/AzureUtils.scala new file mode 100644 index 00000000000..09cf5f3869d --- /dev/null +++ b/cloudSupport/src/main/scala/cromwell/cloudsupport/azure/AzureUtils.scala @@ -0,0 +1,65 @@ +package cromwell.cloudsupport.azure + +import com.azure.core.management.AzureEnvironment +import com.azure.core.management.profile.AzureProfile +import com.azure.identity.DefaultAzureCredentialBuilder +import com.azure.resourcemanager.AzureResourceManager +import com.azure.resourcemanager.storage.models.StorageAccountKey +import com.azure.storage.blob.{BlobContainerClient, BlobContainerClientBuilder} +import com.azure.storage.common.StorageSharedKeyCredential +import com.google.common.net.UrlEscapers + +import java.net.URI +import scala.jdk.CollectionConverters.IterableHasAsScala +import scala.util.{Failure, Success, Try} + +object AzureUtils { + /** + * Generates a BlobContainerClient that can interact with the specified container. Authenticates using the local azure client running on the same machine. + * @param blobContainer Name of the blob container. Looks something like "my-blob-container". + * @param azureEndpoint Azure endpoint of the container. Looks something like https://somedomain.blob.core.windows.net. + * @param subscription Azure subscription. A globally unique identifier. If not provided, a default subscription will be used. + * @return A blob container client capable of interacting with the specified container. + */ + def buildContainerClientFromLocalEnvironment(blobContainer: String, azureEndpoint: String, subscription : Option[String]): Try[BlobContainerClient] = { + def parseURI(string: String): Try[URI] = Try(URI.create(UrlEscapers.urlFragmentEscaper().escape(string))) + def parseStorageAccount(uri: URI): Try[String] = uri.getHost.split("\\.").find(_.nonEmpty) + .map(Success(_)).getOrElse(Failure(new Exception("Could not parse storage account"))) + + val azureProfile = new AzureProfile(AzureEnvironment.AZURE) + + def azureCredentialBuilder = new DefaultAzureCredentialBuilder() + .authorityHost(azureProfile.getEnvironment.getActiveDirectoryEndpoint) + .build + + def authenticateWithSubscription(sub: String) = AzureResourceManager.authenticate(azureCredentialBuilder, azureProfile).withSubscription(sub) + + def authenticateWithDefaultSubscription = AzureResourceManager.authenticate(azureCredentialBuilder, azureProfile).withDefaultSubscription() + + def azure = subscription.map(authenticateWithSubscription(_)).getOrElse(authenticateWithDefaultSubscription) + + def findAzureStorageAccount(storageAccountName: String) = azure.storageAccounts.list.asScala.find(_.name.equals(storageAccountName)) + .map(Success(_)).getOrElse(Failure(new Exception("Azure Storage Account not found."))) + + def buildBlobContainerClient(credential: StorageSharedKeyCredential, endpointURL: String, blobContainerName: String): BlobContainerClient = { + new BlobContainerClientBuilder() + .credential(credential) + .endpoint(endpointURL) + .containerName(blobContainerName) + .buildClient() + } + + def generateBlobContainerClient: Try[BlobContainerClient] = for { + uri <- parseURI(azureEndpoint) + configuredAccount <- parseStorageAccount(uri) + azureAccount <- findAzureStorageAccount(configuredAccount) + keys = azureAccount.getKeys.asScala + key <- keys.headOption.fold[Try[StorageAccountKey]](Failure(new Exception("Storage account has no keys")))(Success(_)) + first = key.value + sskc = new StorageSharedKeyCredential(configuredAccount, first) + bcc = buildBlobContainerClient(sskc, azureEndpoint, blobContainer) + } yield bcc + + generateBlobContainerClient + } +} diff --git a/core/src/main/resources/reference.conf b/core/src/main/resources/reference.conf index 4ecdf1ce233..6ec05cf6025 100644 --- a/core/src/main/resources/reference.conf +++ b/core/src/main/resources/reference.conf @@ -15,6 +15,15 @@ webservice { } akka { + + http { + client { + parsing { + illegal-header-warnings = off + } + } + } + actor.default-dispatcher.fork-join-executor { # Number of threads = min(parallelism-factor * cpus, parallelism-max) # Below are the default values set by Akka, uncomment to tune these @@ -402,6 +411,15 @@ docker { max-retries = 3 // Supported registries (Docker Hub, Google, Quay) can have additional configuration set separately + azure { + // Worst case `ReadOps per minute` value from official docs + // https://github.com/MicrosoftDocs/azure-docs/blob/main/includes/container-registry-limits.md + throttle { + number-of-requests = 1000 + per = 60 seconds + } + num-threads = 10 + } google { // Example of how to configure throttling, available for all supported registries throttle { @@ -461,13 +479,21 @@ languages { } } "biscayne" { - # WDL biscayne is our in-progress name for what will (probably) become WDL 1.1 + # WDL biscayne is our in-progress name for what will become WDL 1.1 language-factory = "languages.wdl.biscayne.WdlBiscayneLanguageFactory" config { strict-validation: true enabled: true } } + "cascades" { + # WDL cascades is our in-progress name for what will (probably) become WDL 2.0 + language-factory = "languages.wdl.cascades.WdlCascadesLanguageFactory" + config { + strict-validation: true + enabled: true + } + } } } } @@ -600,3 +626,19 @@ ga4gh { contact-info-url = "https://cromwell.readthedocs.io/en/stable/" } } + +workflow-state-callback { + enabled: false + ## The number of threads to allocate for performing callbacks + # num-threads: 5 + # endpoint: "http://example.com/foo" # Can be overridden in workflow options + # auth.azure: true + ## Users can override default retry behavior if desired + # request-backoff { + # min: "3 seconds" + # max: "5 minutes" + # multiplier: 1.1 + # } + # max-retries = 10 + +} diff --git a/core/src/main/scala/cromwell/core/WorkflowOptions.scala b/core/src/main/scala/cromwell/core/WorkflowOptions.scala index 010300b2d8b..91a7c30bbfe 100644 --- a/core/src/main/scala/cromwell/core/WorkflowOptions.scala +++ b/core/src/main/scala/cromwell/core/WorkflowOptions.scala @@ -62,6 +62,7 @@ object WorkflowOptions { case object WorkflowFailureMode extends WorkflowOption("workflow_failure_mode") case object UseReferenceDisks extends WorkflowOption("use_reference_disks") case object MemoryRetryMultiplier extends WorkflowOption("memory_retry_multiplier") + case object WorkflowCallbackUri extends WorkflowOption("workflow_callback_uri") private lazy val WorkflowOptionsConf = ConfigFactory.load.getConfig("workflow-options") private lazy val EncryptedFields: Seq[String] = WorkflowOptionsConf.getStringList("encrypted-fields").asScala.toList diff --git a/core/src/main/scala/cromwell/core/path/NioPathMethods.scala b/core/src/main/scala/cromwell/core/path/NioPathMethods.scala index f5791e50c5a..42c17ca3222 100644 --- a/core/src/main/scala/cromwell/core/path/NioPathMethods.scala +++ b/core/src/main/scala/cromwell/core/path/NioPathMethods.scala @@ -2,7 +2,6 @@ package cromwell.core.path import java.nio.file.WatchEvent.{Kind, Modifier} import java.nio.file.{LinkOption, WatchKey, WatchService} - import scala.jdk.CollectionConverters._ /** @@ -68,4 +67,11 @@ trait NioPathMethods { final def startsWith(other: String): Boolean = nioPathPrivate.startsWith(other) final def toRealPath(options: LinkOption*): Path = newPath(nioPathPrivate.toRealPath(options: _*)) + + /** + * Get a valid path object that resolves symlinks if supported + * Default implementation assumes symlinks are supported, and that toRealPath may return a valid path. + * This implementation may be overridden for NIO implementations that do not support symbolic links (For example the Azure NIO library) + */ + def getSymlinkSafePath(options: LinkOption*): Path = toRealPath(options: _*) } diff --git a/cromwell-drs-localizer/src/main/scala/drs/localizer/DrsLocalizerMain.scala b/cromwell-drs-localizer/src/main/scala/drs/localizer/DrsLocalizerMain.scala index 1858f395024..3d99538f614 100644 --- a/cromwell-drs-localizer/src/main/scala/drs/localizer/DrsLocalizerMain.scala +++ b/cromwell-drs-localizer/src/main/scala/drs/localizer/DrsLocalizerMain.scala @@ -2,13 +2,13 @@ package drs.localizer import cats.data.NonEmptyList import cats.effect.{ExitCode, IO, IOApp} -import cats.implicits._ +import cats.implicits.toTraverseOps import cloud.nio.impl.drs.DrsPathResolver.{FatalRetryDisposition, RegularRetryDisposition} import cloud.nio.impl.drs._ import cloud.nio.spi.{CloudNioBackoff, CloudNioSimpleExponentialBackoff} import com.typesafe.scalalogging.StrictLogging import drs.localizer.CommandLineParser.AccessTokenStrategy.{Azure, Google} -import drs.localizer.downloaders.AccessUrlDownloader.Hashes +import drs.localizer.DrsLocalizerMain.{defaultNumRetries, toValidatedUriType} import drs.localizer.downloaders._ import org.apache.commons.csv.{CSVFormat, CSVParser} @@ -17,7 +17,10 @@ import java.nio.charset.Charset import scala.concurrent.duration._ import scala.jdk.CollectionConverters._ import scala.language.postfixOps +import drs.localizer.URIType.URIType +case class UnresolvedDrsUrl(drsUrl: String, downloadDestinationPath: String) +case class ResolvedDrsUrl(drsResponse: DrsResolverResponse, downloadDestinationPath: String, uriType: URIType) object DrsLocalizerMain extends IOApp with StrictLogging { override def run(args: List[String]): IO[ExitCode] = { @@ -38,15 +41,18 @@ object DrsLocalizerMain extends IOApp with StrictLogging { def buildParser(): scopt.OptionParser[CommandLineArguments] = new CommandLineParser() + // Default retry parameters for resolving a DRS url + val defaultNumRetries: Int = 5 val defaultBackoff: CloudNioBackoff = CloudNioSimpleExponentialBackoff( - initialInterval = 10 seconds, maxInterval = 60 seconds, multiplier = 2) + initialInterval = 1 seconds, maxInterval = 60 seconds, multiplier = 2) val defaultDownloaderFactory: DownloaderFactory = new DownloaderFactory { - override def buildAccessUrlDownloader(accessUrl: AccessUrl, downloadLoc: String, hashes: Hashes): IO[Downloader] = - IO.pure(AccessUrlDownloader(accessUrl, downloadLoc, hashes)) + override def buildGcsUriDownloader(gcsPath: String, serviceAccountJsonOption: Option[String], downloadLoc: String, requesterPaysProjectOption: Option[String]): Downloader = + GcsUriDownloader(gcsPath, serviceAccountJsonOption, downloadLoc, requesterPaysProjectOption) - override def buildGcsUriDownloader(gcsPath: String, serviceAccountJsonOption: Option[String], downloadLoc: String, requesterPaysProjectOption: Option[String]): IO[Downloader] = - IO.pure(GcsUriDownloader(gcsPath, serviceAccountJsonOption, downloadLoc, requesterPaysProjectOption)) + override def buildBulkAccessUrlDownloader(urlsToDownload: List[ResolvedDrsUrl]): Downloader = { + BulkAccessUrlDownloader(urlsToDownload) + } } private def printUsage: IO[ExitCode] = { @@ -54,35 +60,83 @@ object DrsLocalizerMain extends IOApp with StrictLogging { IO.pure(ExitCode.Error) } - def runLocalizer(commandLineArguments: CommandLineArguments, drsCredentials: DrsCredentials): IO[ExitCode] = { - commandLineArguments.manifestPath match { - case Some(manifestPath) => - val manifestFile = new File(manifestPath) - val csvParser = CSVParser.parse(manifestFile, Charset.defaultCharset(), CSVFormat.DEFAULT) - val exitCodes: IO[List[ExitCode]] = csvParser.asScala.map(record => { - val drsObject = record.get(0) - val containerPath = record.get(1) - localizeFile(commandLineArguments, drsCredentials, drsObject, containerPath) - }).toList.sequence - exitCodes.map(_.find(_ != ExitCode.Success).getOrElse(ExitCode.Success)) - case None => - val drsObject = commandLineArguments.drsObject.get - val containerPath = commandLineArguments.containerPath.get - localizeFile(commandLineArguments, drsCredentials, drsObject, containerPath) + /** + * Helper function to read a CSV file as pairs of drsURL -> local download destination. + * @param csvManifestPath Path to a CSV file where each row is something like: drs://asdf.ghj, path/to/my/directory + */ + def loadCSVManifest(csvManifestPath: String): IO[List[UnresolvedDrsUrl]] = { + IO { + val openFile = new File(csvManifestPath) + val csvParser = CSVParser.parse(openFile, Charset.defaultCharset(), CSVFormat.DEFAULT) + try{ + csvParser.getRecords.asScala.map(record => UnresolvedDrsUrl(record.get(0), record.get(1))).toList + } finally { + csvParser.close() + } } } - private def localizeFile(commandLineArguments: CommandLineArguments, drsCredentials: DrsCredentials, drsObject: String, containerPath: String) = { - new DrsLocalizerMain(drsObject, containerPath, drsCredentials, commandLineArguments.googleRequesterPaysProject). - resolveAndDownloadWithRetries(downloadRetries = 3, checksumRetries = 1, defaultDownloaderFactory, Option(defaultBackoff)).map(_.exitCode) + def runLocalizer(commandLineArguments: CommandLineArguments, drsCredentials: DrsCredentials) : IO[ExitCode] = { + val urlList = (commandLineArguments.manifestPath, commandLineArguments.drsObject, commandLineArguments.containerPath) match { + case (Some(manifestPath), _, _) => { + loadCSVManifest(manifestPath) + } + case (_, Some(drsObject), Some(containerPath)) => { + IO.pure(List(UnresolvedDrsUrl(drsObject, containerPath))) + } + case(_,_,_) => { + throw new RuntimeException("Illegal command line arguments supplied to drs localizer.") + } + } + val main = new DrsLocalizerMain(urlList, defaultDownloaderFactory, drsCredentials, commandLineArguments.googleRequesterPaysProject) + main.resolveAndDownload().map(_.exitCode) + } + + /** + * Helper function to decide which downloader to use based on data from the DRS response. + * Throws a runtime exception if the DRS response is invalid. + */ + def toValidatedUriType(accessUrl: Option[AccessUrl], gsUri: Option[String]): URIType = { + // if both are provided, prefer using access urls + (accessUrl, gsUri) match { + case (Some(_), _) => + if(!accessUrl.get.url.startsWith("https://")) { throw new RuntimeException("Resolved Access URL does not start with https://")} + URIType.ACCESS + case (_, Some(_)) => + if(!gsUri.get.startsWith("gs://")) { throw new RuntimeException("Resolved Google URL does not start with gs://")} + URIType.GCS + case (_, _) => + throw new RuntimeException("DRS response did not contain any URLs") + } } + } + +object URIType extends Enumeration { + type URIType = Value + val GCS, ACCESS, UNKNOWN = Value } -class DrsLocalizerMain(drsUrl: String, - downloadLoc: String, +class DrsLocalizerMain(toResolveAndDownload: IO[List[UnresolvedDrsUrl]], + downloaderFactory: DownloaderFactory, drsCredentials: DrsCredentials, requesterPaysProjectIdOption: Option[String]) extends StrictLogging { + /** + * This will: + * - resolve all URLS + * - build downloader(s) for them + * - Invoke the downloaders to localize the files. + * @return DownloadSuccess if all downloads succeed. An error otherwise. + */ + def resolveAndDownload(): IO[DownloadResult] = { + val downloadResults = buildDownloaders().flatMap { downloaderList => + downloaderList.map(downloader => downloader.download).traverse(identity) + } + downloadResults.map{list => + list.find(result => result != DownloadSuccess).getOrElse(DownloadSuccess) + } + } + def getDrsPathResolver: IO[DrsLocalizerDrsPathResolver] = { IO { val drsConfig = DrsConfig.fromEnv(sys.env) @@ -91,76 +145,86 @@ class DrsLocalizerMain(drsUrl: String, } } - def resolveAndDownloadWithRetries(downloadRetries: Int, - checksumRetries: Int, - downloaderFactory: DownloaderFactory, - backoff: Option[CloudNioBackoff], - downloadAttempt: Int = 0, - checksumAttempt: Int = 0): IO[DownloadResult] = { - - def maybeRetryForChecksumFailure(t: Throwable): IO[DownloadResult] = { - if (checksumAttempt < checksumRetries) { - backoff foreach { b => Thread.sleep(b.backoffMillis) } - logger.warn(s"Attempting retry $checksumAttempt of $checksumRetries checksum retries to download $drsUrl", t) - // In the event of a checksum failure reset the download attempt to zero. - resolveAndDownloadWithRetries(downloadRetries, checksumRetries, downloaderFactory, backoff map { _.next }, 0, checksumAttempt + 1) - } else { - IO.raiseError(new RuntimeException(s"Exhausted $checksumRetries checksum retries to resolve, download and checksum $drsUrl", t)) - } + /** + * After resolving all of the URLs, this sorts them into an "Access" or "GCS" bucket. + * All access URLS will be downloaded as a batch with a single bulk downloader. + * All google URLs will be downloaded individually in their own google downloader. + * @return List of all downloaders required to fulfill the request. + */ + def buildDownloaders() : IO[List[Downloader]] = { + resolveUrls(toResolveAndDownload).map { pendingDownloads => + val accessUrls = pendingDownloads.filter(url => url.uriType == URIType.ACCESS) + val googleUrls = pendingDownloads.filter(url => url.uriType == URIType.GCS) + val bulkDownloader: List[Downloader] = if (accessUrls.isEmpty) List() else List(buildBulkAccessUrlDownloader(accessUrls)) + val googleDownloaders: List[Downloader] = if (googleUrls.isEmpty) List() else buildGoogleDownloaders(googleUrls) + bulkDownloader ++ googleDownloaders } + } - def maybeRetryForDownloadFailure(t: Throwable): IO[DownloadResult] = { - t match { - case _: FatalRetryDisposition => - IO.raiseError(t) - case _ if downloadAttempt < downloadRetries => - backoff foreach { b => Thread.sleep(b.backoffMillis) } - logger.warn(s"Attempting retry $downloadAttempt of $downloadRetries download retries to download $drsUrl", t) - resolveAndDownloadWithRetries(downloadRetries, checksumRetries, downloaderFactory, backoff map { _.next }, downloadAttempt + 1, checksumAttempt) - case _ => - IO.raiseError(new RuntimeException(s"Exhausted $downloadRetries download retries to resolve, download and checksum $drsUrl", t)) - } + def buildGoogleDownloaders(resolvedGoogleUrls: List[ResolvedDrsUrl]) : List[Downloader] = { + resolvedGoogleUrls.map{url=> + downloaderFactory.buildGcsUriDownloader( + gcsPath = url.drsResponse.gsUri.get, + serviceAccountJsonOption = url.drsResponse.googleServiceAccount.map(_.data.spaces2), + downloadLoc = url.downloadDestinationPath, + requesterPaysProjectOption = requesterPaysProjectIdOption) } + } + def buildBulkAccessUrlDownloader(resolvedUrls: List[ResolvedDrsUrl]) : Downloader = { + downloaderFactory.buildBulkAccessUrlDownloader(resolvedUrls) + } - resolveAndDownload(downloaderFactory).redeemWith({ - maybeRetryForDownloadFailure - }, - { - case f: FatalDownloadFailure => - IO.raiseError(new RuntimeException(s"Fatal error downloading DRS object: $f")) - case r: RetryableDownloadFailure => - maybeRetryForDownloadFailure( - new RuntimeException(s"Retryable download error: $r for $drsUrl on retry attempt $downloadAttempt of $downloadRetries") with RegularRetryDisposition) - case ChecksumFailure => - maybeRetryForChecksumFailure(new RuntimeException(s"Checksum failure for $drsUrl on checksum retry attempt $checksumAttempt of $checksumRetries")) - case o => IO.pure(o) - }) + /** + * Runs a synchronous HTTP request to resolve the provided DRS URL with the provided resolver. + */ + def resolveSingleUrl(resolverObject: DrsLocalizerDrsPathResolver, drsUrlToResolve: UnresolvedDrsUrl): IO[ResolvedDrsUrl] = { + val fields = NonEmptyList.of(DrsResolverField.GsUri, DrsResolverField.GoogleServiceAccount, DrsResolverField.AccessUrl, DrsResolverField.Hashes) + val drsResponse = resolverObject.resolveDrs(drsUrlToResolve.drsUrl, fields) + drsResponse.map(resp => ResolvedDrsUrl(resp, drsUrlToResolve.downloadDestinationPath, toValidatedUriType(resp.accessUrl, resp.gsUri))) } - private [localizer] def resolveAndDownload(downloaderFactory: DownloaderFactory): IO[DownloadResult] = { - resolve(downloaderFactory) flatMap { _.download } + + val defaultBackoff: CloudNioBackoff = CloudNioSimpleExponentialBackoff( + initialInterval = 10 seconds, maxInterval = 60 seconds, multiplier = 2) + + /** + * Runs synchronous HTTP requests to resolve all the DRS urls. + */ + def resolveUrls(unresolvedUrls: IO[List[UnresolvedDrsUrl]]): IO[List[ResolvedDrsUrl]] = { + unresolvedUrls.flatMap { unresolvedList => + getDrsPathResolver.flatMap { resolver => + unresolvedList.map { unresolvedUrl => + resolveWithRetries(resolver, unresolvedUrl, defaultNumRetries, Option(defaultBackoff)) + }.traverse(identity) + } + } } - private [localizer] def resolve(downloaderFactory: DownloaderFactory): IO[Downloader] = { - val fields = NonEmptyList.of(DrsResolverField.GsUri, DrsResolverField.GoogleServiceAccount, DrsResolverField.AccessUrl, DrsResolverField.Hashes) - for { - resolver <- getDrsPathResolver - drsResolverResponse <- resolver.resolveDrs(drsUrl, fields) - - // Currently DRS Resolver only supports resolving DRS paths to access URLs or GCS paths. - downloader <- (drsResolverResponse.accessUrl, drsResolverResponse.gsUri) match { - case (Some(accessUrl), _) => - downloaderFactory.buildAccessUrlDownloader(accessUrl, downloadLoc, drsResolverResponse.hashes) - case (_, Some(gcsPath)) => - val serviceAccountJsonOption = drsResolverResponse.googleServiceAccount.map(_.data.spaces2) - downloaderFactory.buildGcsUriDownloader( - gcsPath = gcsPath, - serviceAccountJsonOption = serviceAccountJsonOption, - downloadLoc = downloadLoc, - requesterPaysProjectOption = requesterPaysProjectIdOption) - case _ => - IO.raiseError(new RuntimeException(DrsPathResolver.ExtractUriErrorMsg)) + def resolveWithRetries(resolverObject: DrsLocalizerDrsPathResolver, + drsUrlToResolve: UnresolvedDrsUrl, + resolutionRetries: Int, + backoff: Option[CloudNioBackoff], + resolutionAttempt: Int = 0) : IO[ResolvedDrsUrl] = { + + def maybeRetryForResolutionFailure(t: Throwable): IO[ResolvedDrsUrl] = { + if (resolutionAttempt < resolutionRetries) { + backoff foreach { b => Thread.sleep(b.backoffMillis) } + logger.warn(s"Attempting retry $resolutionAttempt of $resolutionRetries drs resolution retries to resolve ${drsUrlToResolve.drsUrl}", t) + resolveWithRetries(resolverObject, drsUrlToResolve, resolutionRetries, backoff map { _.next }, resolutionAttempt+1) + } else { + IO.raiseError(new RuntimeException(s"Exhausted $resolutionRetries resolution retries to resolve $drsUrlToResolve.drsUrl", t)) } - } yield downloader + } + + resolveSingleUrl(resolverObject, drsUrlToResolve).redeemWith( + recover = maybeRetryForResolutionFailure, + bind = { + case f: FatalRetryDisposition => + IO.raiseError(new RuntimeException(s"Fatal error resolving DRS URL: $f")) + case _: RegularRetryDisposition => + resolveWithRetries(resolverObject, drsUrlToResolve, resolutionRetries, backoff, resolutionAttempt+1) + case o => IO.pure(o) + }) } } + diff --git a/cromwell-drs-localizer/src/main/scala/drs/localizer/downloaders/AccessUrlDownloader.scala b/cromwell-drs-localizer/src/main/scala/drs/localizer/downloaders/AccessUrlDownloader.scala deleted file mode 100644 index ae6f2fa4f1e..00000000000 --- a/cromwell-drs-localizer/src/main/scala/drs/localizer/downloaders/AccessUrlDownloader.scala +++ /dev/null @@ -1,91 +0,0 @@ -package drs.localizer.downloaders - -import cats.data.Validated.{Invalid, Valid} -import cats.effect.{ExitCode, IO} -import cloud.nio.impl.drs.AccessUrl -import com.typesafe.scalalogging.StrictLogging -import common.exception.AggregatedMessageException -import common.util.StringUtil._ -import common.validation.ErrorOr.ErrorOr -import drs.localizer.downloaders.AccessUrlDownloader._ - -import scala.sys.process.{Process, ProcessLogger} -import scala.util.matching.Regex - -case class GetmResult(returnCode: Int, stderr: String) - -case class AccessUrlDownloader(accessUrl: AccessUrl, downloadLoc: String, hashes: Hashes) extends Downloader with StrictLogging { - def generateDownloadScript: ErrorOr[String] = { - val signedUrl = accessUrl.url - GetmChecksum(hashes, accessUrl).args map { checksumArgs => - s"""mkdir -p $$(dirname '$downloadLoc') && rm -f '$downloadLoc' && getm $checksumArgs --filepath '$downloadLoc' '$signedUrl'""" - } - } - - def runGetm: IO[GetmResult] = { - generateDownloadScript match { - case Invalid(errors) => - IO.raiseError(AggregatedMessageException("Error generating access URL download script", errors.toList)) - case Valid(script) => IO { - val copyCommand = Seq("bash", "-c", script) - val copyProcess = Process(copyCommand) - - val stderr = new StringBuilder() - val errorCapture: String => Unit = { s => stderr.append(s); () } - - // As of `getm` version 0.0.4 the contents of stdout do not appear to be interesting (only a progress bar - // with no option to suppress it), so ignore stdout for now. If stdout becomes interesting in future versions - // of `getm` it can be captured just like stderr is being captured here. - val returnCode = copyProcess ! ProcessLogger(_ => (), errorCapture) - - GetmResult(returnCode, stderr.toString().trim()) - } - } - } - - override def download: IO[DownloadResult] = { - // We don't want to log the unmasked signed URL here. On a PAPI backend this log will end up under the user's - // workspace bucket, but that bucket may have visibility different than the data referenced by the signed URL. - val masked = accessUrl.url.maskSensitiveUri - logger.info(s"Attempting to download data to '$downloadLoc' from access URL '$masked'.") - - runGetm map toDownloadResult - } - - def toDownloadResult(getmResult: GetmResult): DownloadResult = { - getmResult match { - case GetmResult(0, stderr) if stderr.isEmpty => - DownloadSuccess - case GetmResult(0, stderr) => - stderr match { - case ChecksumFailureMessage() => - ChecksumFailure - case _ => - UnrecognizedRetryableDownloadFailure(ExitCode(0)) - } - case GetmResult(rc, stderr) => - stderr match { - case HttpStatusMessage(status) => - Integer.parseInt(status) match { - case 408 | 429 => - RecognizedRetryableDownloadFailure(ExitCode(rc)) - case s if s / 100 == 4 => - FatalDownloadFailure(ExitCode(rc)) - case s if s / 100 == 5 => - RecognizedRetryableDownloadFailure(ExitCode(rc)) - case _ => - UnrecognizedRetryableDownloadFailure(ExitCode(rc)) - } - case _ => - UnrecognizedRetryableDownloadFailure(ExitCode(rc)) - } - } - } -} - -object AccessUrlDownloader { - type Hashes = Option[Map[String, String]] - - val ChecksumFailureMessage: Regex = raw""".*AssertionError: Checksum failed!.*""".r - val HttpStatusMessage: Regex = raw"""ERROR:getm\.cli.*"status_code":\s*(\d+).*""".r -} diff --git a/cromwell-drs-localizer/src/main/scala/drs/localizer/downloaders/BulkAccessUrlDownloader.scala b/cromwell-drs-localizer/src/main/scala/drs/localizer/downloaders/BulkAccessUrlDownloader.scala new file mode 100644 index 00000000000..4668c5072ed --- /dev/null +++ b/cromwell-drs-localizer/src/main/scala/drs/localizer/downloaders/BulkAccessUrlDownloader.scala @@ -0,0 +1,144 @@ +package drs.localizer.downloaders + +import cats.effect.{ExitCode, IO} +import cloud.nio.impl.drs.{AccessUrl, DrsResolverResponse} +import com.typesafe.scalalogging.StrictLogging + +import java.nio.charset.StandardCharsets +import java.nio.file.{Files, Path, Paths} +import scala.sys.process.{Process, ProcessLogger} +import scala.util.matching.Regex +import drs.localizer.ResolvedDrsUrl +case class GetmResult(returnCode: Int, stderr: String) +/** + * Getm is a python tool that is used to download resolved DRS uris quickly and in parallel. + * This class builds a getm-manifest.json file that it uses for input, and builds/executes a shell command + * to invoke the Getm tool, which is expected to already be installed in the local environment. + * @param resolvedUrls + */ +case class BulkAccessUrlDownloader(resolvedUrls : List[ResolvedDrsUrl]) extends Downloader with StrictLogging { + + val getmManifestPath: Path = Paths.get("getm-manifest.json") + /** + * Write a json manifest to disk that looks like: + * // [ + * // { + * // "url" : "www.123.com", + * // "filepath" : "path/to/where/123/should/be/downloaded", + * // "checksum" : "sdfjndsfjkfsdjsdfkjsdf", + * // "checksum-algorithm" : "md5" + * // }, + * // { + * // "url" : "www.567.com" + * // "filepath" : "path/to/where/567/should/be/downloaded", + * // "checksum" : "asdasdasfsdfsdfasdsdfasd", + * // "checksum-algorithm" : "md5" + * // } + * // ] + * + * @param resolvedUrls + * @return Filepath of a getm-manifest.json that Getm can use to download multiple files in parallel. + */ + def generateJsonManifest(resolvedUrls : List[ResolvedDrsUrl]): IO[Path] = { + def toJsonString(drsResponse: DrsResolverResponse, destinationFilepath: String): String = { + //NB: trailing comma is being removed in generateJsonManifest + val accessUrl: AccessUrl = drsResponse.accessUrl.getOrElse(AccessUrl("missing", None)) + drsResponse.hashes.map(_ => { + val checksum = GetmChecksum(drsResponse.hashes, accessUrl).value.getOrElse("error_calculating_checksum") + val checksumAlgorithm = GetmChecksum(drsResponse.hashes, accessUrl).getmAlgorithm + s""" { + | "url" : "${accessUrl.url}", + | "filepath" : "$destinationFilepath", + | "checksum" : "$checksum", + | "checksum-algorithm" : "$checksumAlgorithm" + | }, + |""".stripMargin + }).getOrElse( + s""" { + | "url" : "${accessUrl.url}", + | "filepath" : "$destinationFilepath" + | }, + |""".stripMargin + ) + } + IO { + var jsonString: String = "[\n" + for (resolvedUrl <- resolvedUrls) { + jsonString += toJsonString(resolvedUrl.drsResponse, resolvedUrl.downloadDestinationPath) + } + if(jsonString.contains(',')) { + //remove trailing comma from array elements, but don't crash on empty list. + jsonString = jsonString.substring(0, jsonString.lastIndexOf(",")) + } + jsonString += "\n]" + Files.write(getmManifestPath, jsonString.getBytes(StandardCharsets.UTF_8)) + } + } + + def deleteJsonManifest() = { + Files.deleteIfExists(getmManifestPath) + } + + def generateGetmCommand(pathToMainfestJson : Path) : String = { + s"""getm --manifest ${pathToMainfestJson.toString}""" + } + def runGetm: IO[GetmResult] = { + generateJsonManifest(resolvedUrls).flatMap{ manifestPath => + val script = generateGetmCommand(manifestPath) + val copyCommand : Seq[String] = Seq("bash", "-c", script) + logger.info(script) + val copyProcess = Process(copyCommand) + val stderr = new StringBuilder() + val errorCapture: String => Unit = { s => stderr.append(s); () } + val returnCode = copyProcess ! ProcessLogger(_ => (), errorCapture) + deleteJsonManifest() + logger.info(stderr.toString().trim()) + IO(GetmResult(returnCode, stderr.toString().trim())) + } + } + + override def download: IO[DownloadResult] = { + // We don't want to log the unmasked signed URL here. On a PAPI backend this log will end up under the user's + // workspace bucket, but that bucket may have visibility different than the data referenced by the signed URL. + logger.info(s"Attempting to download data") + + runGetm map toDownloadResult + } + + def toDownloadResult(getmResult: GetmResult): DownloadResult = { + getmResult match { + case GetmResult(0, stderr) if stderr.isEmpty => + DownloadSuccess + case GetmResult(0, stderr) => + stderr match { + case BulkAccessUrlDownloader.ChecksumFailureMessage() => + ChecksumFailure + case _ => + UnrecognizedRetryableDownloadFailure(ExitCode(0)) + } + case GetmResult(rc, stderr) => + stderr match { + case BulkAccessUrlDownloader.HttpStatusMessage(status) => + Integer.parseInt(status) match { + case 408 | 429 => + RecognizedRetryableDownloadFailure(ExitCode(rc)) + case s if s / 100 == 4 => + FatalDownloadFailure(ExitCode(rc)) + case s if s / 100 == 5 => + RecognizedRetryableDownloadFailure(ExitCode(rc)) + case _ => + UnrecognizedRetryableDownloadFailure(ExitCode(rc)) + } + case _ => + UnrecognizedRetryableDownloadFailure(ExitCode(rc)) + } + } + } +} + +object BulkAccessUrlDownloader{ + type Hashes = Option[Map[String, String]] + + val ChecksumFailureMessage: Regex = raw""".*AssertionError: Checksum failed!.*""".r + val HttpStatusMessage: Regex = raw"""ERROR:getm\.cli.*"status_code":\s*(\d+).*""".r +} diff --git a/cromwell-drs-localizer/src/main/scala/drs/localizer/downloaders/DownloaderFactory.scala b/cromwell-drs-localizer/src/main/scala/drs/localizer/downloaders/DownloaderFactory.scala index 8465ede0dd6..6c7f27e8a6e 100644 --- a/cromwell-drs-localizer/src/main/scala/drs/localizer/downloaders/DownloaderFactory.scala +++ b/cromwell-drs-localizer/src/main/scala/drs/localizer/downloaders/DownloaderFactory.scala @@ -1,14 +1,12 @@ package drs.localizer.downloaders -import cats.effect.IO -import cloud.nio.impl.drs.AccessUrl -import drs.localizer.downloaders.AccessUrlDownloader.Hashes +import drs.localizer.ResolvedDrsUrl trait DownloaderFactory { - def buildAccessUrlDownloader(accessUrl: AccessUrl, downloadLoc: String, hashes: Hashes): IO[Downloader] + def buildBulkAccessUrlDownloader(urlsToDownload: List[ResolvedDrsUrl]) : Downloader def buildGcsUriDownloader(gcsPath: String, serviceAccountJsonOption: Option[String], downloadLoc: String, - requesterPaysProjectOption: Option[String]): IO[Downloader] + requesterPaysProjectOption: Option[String]): Downloader } diff --git a/cromwell-drs-localizer/src/main/scala/drs/localizer/downloaders/GcsUriDownloader.scala b/cromwell-drs-localizer/src/main/scala/drs/localizer/downloaders/GcsUriDownloader.scala index d4c81af6300..8991e79f5fd 100644 --- a/cromwell-drs-localizer/src/main/scala/drs/localizer/downloaders/GcsUriDownloader.scala +++ b/cromwell-drs-localizer/src/main/scala/drs/localizer/downloaders/GcsUriDownloader.scala @@ -1,10 +1,12 @@ package drs.localizer.downloaders import cats.effect.{ExitCode, IO} +import cloud.nio.spi.{CloudNioBackoff, CloudNioSimpleExponentialBackoff} import com.typesafe.scalalogging.StrictLogging import drs.localizer.downloaders.GcsUriDownloader.RequesterPaysErrorMsg - +import scala.language.postfixOps import java.nio.charset.StandardCharsets import java.nio.file.{Files, Path} +import scala.concurrent.duration.DurationInt import scala.sys.process.{Process, ProcessLogger} case class GcsUriDownloader(gcsUrl: String, @@ -12,7 +14,15 @@ case class GcsUriDownloader(gcsUrl: String, downloadLoc: String, requesterPaysProjectIdOption: Option[String]) extends Downloader with StrictLogging { + val defaultNumRetries: Int = 5 + val defaultBackoff: CloudNioBackoff = CloudNioSimpleExponentialBackoff( + initialInterval = 1 seconds, maxInterval = 60 seconds, multiplier = 2) + override def download: IO[DownloadResult] = { + downloadWithRetries(defaultNumRetries, Option(defaultBackoff)) + } + + def runDownloadCommand: IO[DownloadResult] = { logger.info(s"Requester Pays project ID is $requesterPaysProjectIdOption") logger.info(s"Attempting to download $gcsUrl to $downloadLoc") @@ -40,6 +50,37 @@ case class GcsUriDownloader(gcsUrl: String, IO.pure(result) } + def downloadWithRetries(downloadRetries: Int, + backoff: Option[CloudNioBackoff], + downloadAttempt: Int = 0): IO[DownloadResult] = + { + + def maybeRetryForDownloadFailure(t: Throwable): IO[DownloadResult] = { + if (downloadAttempt < downloadRetries) { + backoff foreach { b => Thread.sleep(b.backoffMillis) } + logger.warn(s"Attempting download retry $downloadAttempt of $downloadRetries for a GCS url", t) + downloadWithRetries(downloadRetries, backoff map { + _.next + }, downloadAttempt + 1) + } else { + IO.raiseError(new RuntimeException(s"Exhausted $downloadRetries resolution retries to download GCS file", t)) + } + } + + runDownloadCommand.redeemWith( + recover = maybeRetryForDownloadFailure, + bind = { + case s: DownloadSuccess.type => + IO.pure(s) + case _: RecognizedRetryableDownloadFailure => + downloadWithRetries(downloadRetries, backoff, downloadAttempt+1) + case _: UnrecognizedRetryableDownloadFailure => + downloadWithRetries(downloadRetries, backoff, downloadAttempt+1) + case _ => + downloadWithRetries(downloadRetries, backoff, downloadAttempt+1) + }) + } + /** * Bash to download the GCS file using `gsutil`. */ diff --git a/cromwell-drs-localizer/src/main/scala/drs/localizer/downloaders/GetmChecksum.scala b/cromwell-drs-localizer/src/main/scala/drs/localizer/downloaders/GetmChecksum.scala index 2a39a6543a3..2ca1bd3d2e3 100644 --- a/cromwell-drs-localizer/src/main/scala/drs/localizer/downloaders/GetmChecksum.scala +++ b/cromwell-drs-localizer/src/main/scala/drs/localizer/downloaders/GetmChecksum.scala @@ -3,7 +3,7 @@ package drs.localizer.downloaders import cats.syntax.validated._ import cloud.nio.impl.drs.AccessUrl import common.validation.ErrorOr.ErrorOr -import drs.localizer.downloaders.AccessUrlDownloader.Hashes +import drs.localizer.downloaders.BulkAccessUrlDownloader.Hashes import org.apache.commons.codec.binary.Base64.encodeBase64String import org.apache.commons.codec.binary.Hex.decodeHex import org.apache.commons.text.StringEscapeUtils diff --git a/cromwell-drs-localizer/src/test/scala/drs/localizer/DrsLocalizerMainSpec.scala b/cromwell-drs-localizer/src/test/scala/drs/localizer/DrsLocalizerMainSpec.scala index 66799fcc099..52fa4c99330 100644 --- a/cromwell-drs-localizer/src/test/scala/drs/localizer/DrsLocalizerMainSpec.scala +++ b/cromwell-drs-localizer/src/test/scala/drs/localizer/DrsLocalizerMainSpec.scala @@ -3,12 +3,11 @@ package drs.localizer import cats.data.NonEmptyList import cats.effect.{ExitCode, IO} import cats.syntax.validated._ -import cloud.nio.impl.drs.DrsPathResolver.FatalRetryDisposition +import drs.localizer.MockDrsPaths.{fakeAccessUrls, fakeDrsUrlWithGcsResolutionOnly, fakeGoogleUrls} import cloud.nio.impl.drs.{AccessUrl, DrsConfig, DrsCredentials, DrsResolverField, DrsResolverResponse} import common.assertion.CromwellTimeoutSpec import common.validation.ErrorOr.ErrorOr import drs.localizer.MockDrsLocalizerDrsPathResolver.{FakeAccessTokenStrategy, FakeHashes} -import drs.localizer.downloaders.AccessUrlDownloader.Hashes import drs.localizer.downloaders._ import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers @@ -19,6 +18,28 @@ class DrsLocalizerMainSpec extends AnyFlatSpec with CromwellTimeoutSpec with Mat val fakeDownloadLocation = "/root/foo/foo-123.bam" val fakeRequesterPaysId = "fake-billing-project" + val fakeGoogleInput : IO[List[UnresolvedDrsUrl]] = IO(List( + UnresolvedDrsUrl(fakeDrsUrlWithGcsResolutionOnly, "/path/to/nowhere") + )) + + val fakeAccessInput: IO[List[UnresolvedDrsUrl]] = IO(List( + UnresolvedDrsUrl("https://my-fake-access-url.com", "/path/to/somewhereelse") + )) + + val fakeBulkGoogleInput: IO[List[UnresolvedDrsUrl]] = IO(List( + UnresolvedDrsUrl("drs://my-fake-google-url.com", "/path/to/nowhere"), + UnresolvedDrsUrl("drs://my-fake-google-url.com2", "/path/to/nowhere2"), + UnresolvedDrsUrl("drs://my-fake-google-url.com3", "/path/to/nowhere3"), + UnresolvedDrsUrl("drs://my-fake-google-url.com4", "/path/to/nowhere4") + )) + + val fakeBulkAccessInput: IO[List[UnresolvedDrsUrl]] = IO(List( + UnresolvedDrsUrl("drs://my-fake-access-url.com", "/path/to/somewhereelse"), + UnresolvedDrsUrl("drs://my-fake-access-url2.com", "/path/to/somewhereelse2"), + UnresolvedDrsUrl("drs://my-fake-access-url3.com", "/path/to/somewhereelse3"), + UnresolvedDrsUrl("drs://my-fake-access-url4.com", "/path/to/somewhereelse4") + )) + behavior of "DrsLocalizerMain" it should "fail if drs input is not passed" in { @@ -29,264 +50,192 @@ class DrsLocalizerMainSpec extends AnyFlatSpec with CromwellTimeoutSpec with Mat DrsLocalizerMain.run(List(MockDrsPaths.fakeDrsUrlWithGcsResolutionOnly)).unsafeRunSync() shouldBe ExitCode.Error } - it should "accept arguments and run successfully without Requester Pays ID" in { - val mockDrsLocalizer = new MockDrsLocalizerMain(MockDrsPaths.fakeDrsUrlWithGcsResolutionOnly, fakeDownloadLocation, None) - val expected = GcsUriDownloader( - gcsUrl = "gs://abc/foo-123/abc123", - serviceAccountJson = None, - downloadLoc = fakeDownloadLocation, - requesterPaysProjectIdOption = None) - mockDrsLocalizer.resolve(DrsLocalizerMain.defaultDownloaderFactory).unsafeRunSync() shouldBe expected - } - - it should "run successfully with all 3 arguments" in { - val mockDrsLocalizer = new MockDrsLocalizerMain(MockDrsPaths.fakeDrsUrlWithGcsResolutionOnly, fakeDownloadLocation, Option(fakeRequesterPaysId)) - val expected = GcsUriDownloader( - gcsUrl = "gs://abc/foo-123/abc123", - serviceAccountJson = None, - downloadLoc = fakeDownloadLocation, - requesterPaysProjectIdOption = Option(fakeRequesterPaysId)) - mockDrsLocalizer.resolve(DrsLocalizerMain.defaultDownloaderFactory).unsafeRunSync() shouldBe expected - } - - it should "fail and throw error if the DRS Resolver response does not have gs:// url" in { - val mockDrsLocalizer = new MockDrsLocalizerMain(MockDrsPaths.fakeDrsUrlWithoutAnyResolution, fakeDownloadLocation, None) - - the[RuntimeException] thrownBy { - mockDrsLocalizer.resolve(DrsLocalizerMain.defaultDownloaderFactory).unsafeRunSync() - } should have message "No access URL nor GCS URI starting with 'gs://' found in the DRS Resolver response!" - } - - it should "resolve to use the correct downloader for an access url" in { - val mockDrsLocalizer = new MockDrsLocalizerMain(MockDrsPaths.fakeDrsUrlWithAccessUrlResolutionOnly, fakeDownloadLocation, None) - val expected = AccessUrlDownloader( - accessUrl = AccessUrl(url = "http://abc/def/ghi.bam", headers = None), - downloadLoc = fakeDownloadLocation, - hashes = FakeHashes - ) - mockDrsLocalizer.resolve(DrsLocalizerMain.defaultDownloaderFactory).unsafeRunSync() shouldBe expected - } - - it should "resolve to use the correct downloader for an access url when the DRS Resolver response also contains a gs url" in { - val mockDrsLocalizer = new MockDrsLocalizerMain(MockDrsPaths.fakeDrsUrlWithAccessUrlAndGcsResolution, fakeDownloadLocation, None) - val expected = AccessUrlDownloader( - accessUrl = AccessUrl(url = "http://abc/def/ghi.bam", headers = None), downloadLoc = fakeDownloadLocation, - hashes = FakeHashes - ) - mockDrsLocalizer.resolve(DrsLocalizerMain.defaultDownloaderFactory).unsafeRunSync() shouldBe expected - } - - it should "not retry on access URL download success" in { - var actualAttempts = 0 + it should "tolerate no URLs being provided" in { + val mockDownloadFactory = new DownloaderFactory { + override def buildGcsUriDownloader(gcsPath: String, serviceAccountJsonOption: Option[String], downloadLoc: String, requesterPaysProjectOption: Option[String]): Downloader = { + // This test path should never ask for the Google downloader + throw new RuntimeException("test failure111") + } - val drsLocalizer = new MockDrsLocalizerMain(MockDrsPaths.fakeDrsUrlWithAccessUrlResolutionOnly, fakeDownloadLocation, None) { - override def resolveAndDownload(downloaderFactory: DownloaderFactory): IO[DownloadResult] = { - actualAttempts = actualAttempts + 1 - super.resolveAndDownload(downloaderFactory) + override def buildBulkAccessUrlDownloader(urlsToDownload: List[ResolvedDrsUrl]): Downloader = { + // This test path should never ask for the Bulk downloader + throw new RuntimeException("test failure111") } } - val accessUrlDownloader = IO.pure(new Downloader { - override def download: IO[DownloadResult] = - IO.pure(DownloadSuccess) - }) + val mockdrsLocalizer = new MockDrsLocalizerMain(IO(List()), mockDownloadFactory, FakeAccessTokenStrategy, Option(fakeRequesterPaysId)) + val downloaders: List[Downloader] = mockdrsLocalizer.buildDownloaders().unsafeRunSync() + downloaders.length shouldBe 0 + } - val downloaderFactory = new DownloaderFactory { - override def buildAccessUrlDownloader(accessUrl: AccessUrl, downloadLoc: String, hashes: Hashes): IO[Downloader] = { - accessUrlDownloader + it should "build correct downloader(s) for a single google URL" in { + val mockDownloadFactory = new DownloaderFactory { + override def buildGcsUriDownloader(gcsPath: String, serviceAccountJsonOption: Option[String], downloadLoc: String, requesterPaysProjectOption: Option[String]): Downloader = { + GcsUriDownloader(gcsPath, serviceAccountJsonOption, downloadLoc, requesterPaysProjectOption) } - override def buildGcsUriDownloader(gcsPath: String, serviceAccountJsonOption: Option[String], downloadLoc: String, requesterPaysProjectOption: Option[String]): IO[Downloader] = { - // This test path should never ask for the GCS downloader - throw new RuntimeException("test failure") + override def buildBulkAccessUrlDownloader(urlsToDownload: List[ResolvedDrsUrl]): Downloader = { + // This test path should never ask for the Bulk downloader + throw new RuntimeException("test failure111") } } - drsLocalizer.resolveAndDownloadWithRetries( - downloadRetries = 3, - checksumRetries = 1, - downloaderFactory = downloaderFactory, - backoff = None - ).unsafeRunSync() shouldBe DownloadSuccess + val mockdrsLocalizer = new MockDrsLocalizerMain(IO(List(fakeGoogleUrls.head._1)), mockDownloadFactory,FakeAccessTokenStrategy, Option(fakeRequesterPaysId)) + val downloaders: List[Downloader] = mockdrsLocalizer.buildDownloaders().unsafeRunSync() + downloaders.length shouldBe 1 - actualAttempts shouldBe 1 - } - - it should "retry an appropriate number of times for regular retryable access URL download failures" in { - var actualAttempts = 0 - - val drsLocalizer = new MockDrsLocalizerMain(MockDrsPaths.fakeDrsUrlWithAccessUrlResolutionOnly, fakeDownloadLocation, None) { - override def resolveAndDownload(downloaderFactory: DownloaderFactory): IO[DownloadResult] = { - actualAttempts = actualAttempts + 1 - super.resolveAndDownload(downloaderFactory) - } + val correct = downloaders.head match { + case _: GcsUriDownloader => true + case _ => false } - val accessUrlDownloader = IO.pure(new Downloader { - override def download: IO[DownloadResult] = - IO.pure(RecognizedRetryableDownloadFailure(exitCode = ExitCode(0))) - }) - - val downloaderFactory = new DownloaderFactory { - override def buildAccessUrlDownloader(accessUrl: AccessUrl, downloadLoc: String, hashes: Hashes): IO[Downloader] = { - accessUrlDownloader - } + correct shouldBe true + } - override def buildGcsUriDownloader(gcsPath: String, serviceAccountJsonOption: Option[String], downloadLoc: String, requesterPaysProjectOption: Option[String]): IO[Downloader] = { + it should "build correct downloader(s) for a single access URL" in { + val mockDownloadFactory = new DownloaderFactory { + override def buildGcsUriDownloader(gcsPath: String, serviceAccountJsonOption: Option[String], downloadLoc: String, requesterPaysProjectOption: Option[String]): Downloader = { // This test path should never ask for the GCS downloader throw new RuntimeException("test failure") } - } - - assertThrows[Throwable] { - drsLocalizer.resolveAndDownloadWithRetries( - downloadRetries = 3, - checksumRetries = 1, - downloaderFactory = downloaderFactory, - backoff = None - ).unsafeRunSync() - } - - actualAttempts shouldBe 4 // 1 initial attempt + 3 retries = 4 total attempts - } - it should "retry an appropriate number of times for fatal retryable access URL download failures" in { - var actualAttempts = 0 - - val drsLocalizer = new MockDrsLocalizerMain(MockDrsPaths.fakeDrsUrlWithAccessUrlResolutionOnly, fakeDownloadLocation, None) { - override def resolveAndDownload(downloaderFactory: DownloaderFactory): IO[DownloadResult] = { - actualAttempts = actualAttempts + 1 - IO.raiseError(new RuntimeException("testing: fatal error") with FatalRetryDisposition) + override def buildBulkAccessUrlDownloader(urlsToDownload: List[ResolvedDrsUrl]): Downloader = { + BulkAccessUrlDownloader(urlsToDownload) } } - val accessUrlDownloader = IO.pure(new Downloader { - override def download: IO[DownloadResult] = - IO.pure(RecognizedRetryableDownloadFailure(exitCode = ExitCode(0))) - }) + val mockdrsLocalizer = new MockDrsLocalizerMain(IO(List(fakeAccessUrls.head._1)), mockDownloadFactory, FakeAccessTokenStrategy, Option(fakeRequesterPaysId)) + val downloaders: List[Downloader] = mockdrsLocalizer.buildDownloaders().unsafeRunSync() + downloaders.length shouldBe 1 + + val expected = BulkAccessUrlDownloader( + List(fakeAccessUrls.head._2) + ) + expected shouldEqual downloaders.head + } - val downloaderFactory = new DownloaderFactory { - override def buildAccessUrlDownloader(accessUrl: AccessUrl, downloadLoc: String, hashes: Hashes): IO[Downloader] = { - accessUrlDownloader + it should "build correct downloader(s) for multiple google URLs" in { + val mockDownloadFactory = new DownloaderFactory { + override def buildGcsUriDownloader(gcsPath: String, serviceAccountJsonOption: Option[String], downloadLoc: String, requesterPaysProjectOption: Option[String]): Downloader = { + GcsUriDownloader(gcsPath, serviceAccountJsonOption, downloadLoc, requesterPaysProjectOption) } - override def buildGcsUriDownloader(gcsPath: String, serviceAccountJsonOption: Option[String], downloadLoc: String, requesterPaysProjectOption: Option[String]): IO[Downloader] = { + override def buildBulkAccessUrlDownloader(urlsToDownload: List[ResolvedDrsUrl]): Downloader = { // This test path should never ask for the GCS downloader throw new RuntimeException("test failure") } } - - assertThrows[Throwable] { - drsLocalizer.resolveAndDownloadWithRetries( - downloadRetries = 3, - checksumRetries = 1, - downloaderFactory = downloaderFactory, - backoff = None - ).unsafeRunSync() - } - - actualAttempts shouldBe 1 // 1 and done with a fatal exception - } - - it should "not retry on GCS URI download success" in { - var actualAttempts = 0 - val drsLocalizer = new MockDrsLocalizerMain(MockDrsPaths.fakeDrsUrlWithGcsResolutionOnly, fakeDownloadLocation, None) { - override def resolveAndDownload(downloaderFactory: DownloaderFactory): IO[DownloadResult] = { - actualAttempts = actualAttempts + 1 - super.resolveAndDownload(downloaderFactory) - } - } - val gcsUriDownloader = IO.pure(new Downloader { - override def download: IO[DownloadResult] = - IO.pure(DownloadSuccess) + val unresolvedUrls : List[UnresolvedDrsUrl] = fakeGoogleUrls.map(pair => pair._1).toList + val mockdrsLocalizer = new MockDrsLocalizerMain(IO(unresolvedUrls), mockDownloadFactory, FakeAccessTokenStrategy, Option(fakeRequesterPaysId)) + val downloaders: List[Downloader] = mockdrsLocalizer.buildDownloaders().unsafeRunSync() + downloaders.length shouldBe unresolvedUrls.length + + val countGoogleDownloaders = downloaders.count(downloader => downloader match { + case _: GcsUriDownloader => true + case _ => false }) + // We expect one GCS downloader for each GCS uri provided + countGoogleDownloaders shouldBe downloaders.length + } - val downloaderFactory = new DownloaderFactory { - override def buildAccessUrlDownloader(accessUrl: AccessUrl, downloadLoc: String, hashes: Hashes): IO[Downloader] = { - // This test path should never ask for the access URL downloader + it should "build a single bulk downloader for multiple access URLs" in { + val mockDownloadFactory = new DownloaderFactory { + override def buildGcsUriDownloader(gcsPath: String, serviceAccountJsonOption: Option[String], downloadLoc: String, requesterPaysProjectOption: Option[String]): Downloader = { + // This test path should never ask for the GCS downloader throw new RuntimeException("test failure") } - override def buildGcsUriDownloader(gcsPath: String, serviceAccountJsonOption: Option[String], downloadLoc: String, requesterPaysProjectOption: Option[String]): IO[Downloader] = { - gcsUriDownloader + override def buildBulkAccessUrlDownloader(urlsToDownload: List[ResolvedDrsUrl]): Downloader = { + BulkAccessUrlDownloader(urlsToDownload) } } - - drsLocalizer.resolveAndDownloadWithRetries( - downloadRetries = 3, - checksumRetries = 1, - downloaderFactory = downloaderFactory, - backoff = None).unsafeRunSync() - - actualAttempts shouldBe 1 + val unresolvedUrls: List[UnresolvedDrsUrl] = fakeAccessUrls.map(pair => pair._1).toList + val mockdrsLocalizer = new MockDrsLocalizerMain(IO(unresolvedUrls), mockDownloadFactory, FakeAccessTokenStrategy, Option(fakeRequesterPaysId)) + val downloaders: List[Downloader] = mockdrsLocalizer.buildDownloaders().unsafeRunSync() + downloaders.length shouldBe 1 + + val countBulkDownloaders = downloaders.count(downloader => downloader match { + case _: BulkAccessUrlDownloader => true + case _ => false + }) + // We expect one total Bulk downloader for all access URIs to share + countBulkDownloaders shouldBe 1 + val expected = BulkAccessUrlDownloader( + fakeAccessUrls.map(pair => pair._2).toList + ) + expected shouldEqual downloaders.head } - it should "retry an appropriate number of times for retryable GCS URI download failures" in { - var actualAttempts = 0 - val drsLocalizer = new MockDrsLocalizerMain(MockDrsPaths.fakeDrsUrlWithGcsResolutionOnly, fakeDownloadLocation, None) { - override def resolveAndDownload(downloaderFactory: DownloaderFactory): IO[DownloadResult] = { - actualAttempts = actualAttempts + 1 - super.resolveAndDownload(downloaderFactory) - } - } - val gcsUriDownloader = IO.pure(new Downloader { - override def download: IO[DownloadResult] = - IO.pure(RecognizedRetryableDownloadFailure(exitCode = ExitCode(1))) - }) + it should "build 1 bulk downloader and 5 google downloaders for a mix of URLs" in { + val unresolvedUrls: List[UnresolvedDrsUrl] = fakeAccessUrls.map(pair => pair._1).toList ++ fakeGoogleUrls.map(pair => pair._1).toList + val mockdrsLocalizer = new MockDrsLocalizerMain(IO(unresolvedUrls), DrsLocalizerMain.defaultDownloaderFactory, FakeAccessTokenStrategy, Option(fakeRequesterPaysId)) + val downloaders: List[Downloader] = mockdrsLocalizer.buildDownloaders().unsafeRunSync() - val downloaderFactory = new DownloaderFactory { - override def buildAccessUrlDownloader(accessUrl: AccessUrl, downloadLoc: String, hashes: Hashes): IO[Downloader] = { - // This test path should never ask for the access URL downloader - throw new RuntimeException("test failure") - } + downloaders.length shouldBe 6 - override def buildGcsUriDownloader(gcsPath: String, serviceAccountJsonOption: Option[String], downloadLoc: String, requesterPaysProjectOption: Option[String]): IO[Downloader] = { - gcsUriDownloader - } - } + //we expect a single bulk downloader despite 5 access URLs being provided + val countBulkDownloaders = downloaders.count(downloader => downloader match { + case _: BulkAccessUrlDownloader => true + case _ => false + }) + // We expect one GCS downloader for each GCS uri provided + countBulkDownloaders shouldBe 1 + val countGoogleDownloaders = downloaders.count(downloader => downloader match { + case _: GcsUriDownloader => true + case _ => false + }) + // We expect one GCS downloader for each GCS uri provided + countBulkDownloaders shouldBe 1 + countGoogleDownloaders shouldBe 5 + } - assertThrows[Throwable] { - drsLocalizer.resolveAndDownloadWithRetries( - downloadRetries = 3, - checksumRetries = 1, - downloaderFactory = downloaderFactory, - backoff = None).unsafeRunSync() - } + it should "accept arguments and run successfully without Requester Pays ID" in { + val unresolved = fakeGoogleUrls.head._1 + val mockDrsLocalizer = new MockDrsLocalizerMain(IO(List(unresolved)), DrsLocalizerMain.defaultDownloaderFactory, FakeAccessTokenStrategy, None) + val expected = GcsUriDownloader( + gcsUrl = fakeGoogleUrls.get(unresolved).get.drsResponse.gsUri.get, + serviceAccountJson = None, + downloadLoc = unresolved.downloadDestinationPath, + requesterPaysProjectIdOption = None) + val downloader: Downloader = mockDrsLocalizer.buildDownloaders().unsafeRunSync().head + downloader shouldBe expected + } - actualAttempts shouldBe 4 // 1 initial attempt + 3 retries = 4 total attempts + it should "run successfully with all 3 arguments" in { + val unresolved = fakeGoogleUrls.head._1 + val mockDrsLocalizer = new MockDrsLocalizerMain(IO(List(unresolved)), DrsLocalizerMain.defaultDownloaderFactory, FakeAccessTokenStrategy, Option(fakeRequesterPaysId)) + val expected = GcsUriDownloader( + gcsUrl = fakeGoogleUrls.get(unresolved).get.drsResponse.gsUri.get, + serviceAccountJson = None, + downloadLoc = unresolved.downloadDestinationPath, + requesterPaysProjectIdOption = Option(fakeRequesterPaysId)) + val downloader: Downloader = mockDrsLocalizer.buildDownloaders().unsafeRunSync().head + downloader shouldBe expected } - it should "retry an appropriate number of times for checksum failures" in { - var actualAttempts = 0 - val drsLocalizer = new MockDrsLocalizerMain(MockDrsPaths.fakeDrsUrlWithAccessUrlResolutionOnly, fakeDownloadLocation, None) { - override def resolveAndDownload(downloaderFactory: DownloaderFactory): IO[DownloadResult] = { - actualAttempts = actualAttempts + 1 - super.resolveAndDownload(downloaderFactory) - } - } - val accessUrlDownloader = IO.pure(new Downloader { - override def download: IO[DownloadResult] = - IO.pure(ChecksumFailure) - }) + it should "successfully identify uri types, preferring access" in { + val exampleAccessResponse = DrsResolverResponse(accessUrl = Option(AccessUrl("https://something.com", FakeHashes))) + val exampleGoogleResponse = DrsResolverResponse(gsUri = Option("gs://something")) + val exampleMixedResponse = DrsResolverResponse(accessUrl = Option(AccessUrl("https://something.com", FakeHashes)), gsUri = Option("gs://something")) + DrsLocalizerMain.toValidatedUriType(exampleAccessResponse.accessUrl, exampleAccessResponse.gsUri) shouldBe URIType.ACCESS + DrsLocalizerMain.toValidatedUriType(exampleGoogleResponse.accessUrl, exampleGoogleResponse.gsUri) shouldBe URIType.GCS + DrsLocalizerMain.toValidatedUriType(exampleMixedResponse.accessUrl, exampleMixedResponse.gsUri) shouldBe URIType.ACCESS + } - val downloaderFactory = new DownloaderFactory { - override def buildAccessUrlDownloader(accessUrl: AccessUrl, downloadLoc: String, hashes: Hashes): IO[Downloader] = { - accessUrlDownloader - } + it should "throw an exception if the DRS Resolver response is invalid" in { + val badAccessResponse = DrsResolverResponse(accessUrl = Option(AccessUrl("hQQps://something.com", FakeHashes))) + val badGoogleResponse = DrsResolverResponse(gsUri = Option("gQQs://something")) + val emptyResponse = DrsResolverResponse() - override def buildGcsUriDownloader(gcsPath: String, serviceAccountJsonOption: Option[String], downloadLoc: String, requesterPaysProjectOption: Option[String]): IO[Downloader] = { - // This test path should never ask for the GCS URI downloader. - throw new RuntimeException("test failure") - } - } + the[RuntimeException] thrownBy { + DrsLocalizerMain.toValidatedUriType(badAccessResponse.accessUrl, badAccessResponse.gsUri) + } should have message "Resolved Access URL does not start with https://" - assertThrows[Throwable] { - drsLocalizer.resolveAndDownloadWithRetries( - downloadRetries = 3, - checksumRetries = 1, - downloaderFactory = downloaderFactory, - backoff = None).unsafeRunSync() - } + the[RuntimeException] thrownBy { + DrsLocalizerMain.toValidatedUriType(badGoogleResponse.accessUrl, badGoogleResponse.gsUri) + } should have message "Resolved Google URL does not start with gs://" - actualAttempts shouldBe 2 // 1 initial attempt + 1 retry = 2 total attempts + the[RuntimeException] thrownBy { + DrsLocalizerMain.toValidatedUriType(emptyResponse.accessUrl, emptyResponse.gsUri) + } should have message "DRS response did not contain any URLs" } } @@ -295,27 +244,53 @@ object MockDrsPaths { val fakeDrsUrlWithAccessUrlResolutionOnly = "drs://def/bar-456/def456" val fakeDrsUrlWithAccessUrlAndGcsResolution = "drs://ghi/baz-789/ghi789" val fakeDrsUrlWithoutAnyResolution = "drs://foo/bar/no-gcs-path" + + val fakeGoogleUrls: Map[UnresolvedDrsUrl, ResolvedDrsUrl] = Map( + (UnresolvedDrsUrl("drs://abc/foo-123/google/0", "/path/to/google/local0"), ResolvedDrsUrl(DrsResolverResponse(gsUri = Option("gs://some/uri0")), "/path/to/google/local0", URIType.GCS)), + (UnresolvedDrsUrl("drs://abc/foo-123/google/1", "/path/to/google/local1"), ResolvedDrsUrl(DrsResolverResponse(gsUri = Option("gs://some/uri1")), "/path/to/google/local1", URIType.GCS)), + (UnresolvedDrsUrl("drs://abc/foo-123/google/2", "/path/to/google/local2"), ResolvedDrsUrl(DrsResolverResponse(gsUri = Option("gs://some/uri2")), "/path/to/google/local2", URIType.GCS)), + (UnresolvedDrsUrl("drs://abc/foo-123/google/3", "/path/to/google/local3"), ResolvedDrsUrl(DrsResolverResponse(gsUri = Option("gs://some/uri3")), "/path/to/google/local3", URIType.GCS)), + (UnresolvedDrsUrl("drs://abc/foo-123/google/4", "/path/to/google/local4"), ResolvedDrsUrl(DrsResolverResponse(gsUri = Option("gs://some/uri4")), "/path/to/google/local4", URIType.GCS)) + ) + + val fakeAccessUrls: Map[UnresolvedDrsUrl, ResolvedDrsUrl] = Map( + (UnresolvedDrsUrl("drs://abc/foo-123/access/0", "/path/to/access/local0"), ResolvedDrsUrl(DrsResolverResponse(accessUrl = Option(AccessUrl("https://abc/foo-123/access/0", FakeHashes))), "/path/to/access/local0", URIType.ACCESS)), + (UnresolvedDrsUrl("drs://abc/foo-123/access/1", "/path/to/access/local1"), ResolvedDrsUrl(DrsResolverResponse(accessUrl = Option(AccessUrl("https://abc/foo-123/access/1", FakeHashes))), "/path/to/access/local1", URIType.ACCESS)), + (UnresolvedDrsUrl("drs://abc/foo-123/access/2", "/path/to/access/local2"), ResolvedDrsUrl(DrsResolverResponse(accessUrl = Option(AccessUrl("https://abc/foo-123/access/2", FakeHashes))), "/path/to/access/local2", URIType.ACCESS)), + (UnresolvedDrsUrl("drs://abc/foo-123/access/3", "/path/to/access/local3"), ResolvedDrsUrl(DrsResolverResponse(accessUrl = Option(AccessUrl("https://abc/foo-123/access/3", FakeHashes))), "/path/to/access/local3", URIType.ACCESS)), + (UnresolvedDrsUrl("drs://abc/foo-123/access/4", "/path/to/access/local4"), ResolvedDrsUrl(DrsResolverResponse(accessUrl = Option(AccessUrl("https://abc/foo-123/access/4", FakeHashes))), "/path/to/access/local4", URIType.ACCESS)) + ) } -class MockDrsLocalizerMain(drsUrl: String, - downloadLoc: String, - requesterPaysProjectIdOption: Option[String], +class MockDrsLocalizerMain(toResolveAndDownload: IO[List[UnresolvedDrsUrl]], + downloaderFactory: DownloaderFactory, + drsCredentials: DrsCredentials, + requesterPaysProjectIdOption: Option[String] ) - extends DrsLocalizerMain(drsUrl, downloadLoc, FakeAccessTokenStrategy, requesterPaysProjectIdOption) { + + extends DrsLocalizerMain(toResolveAndDownload, downloaderFactory, FakeAccessTokenStrategy, requesterPaysProjectIdOption) { override def getDrsPathResolver: IO[DrsLocalizerDrsPathResolver] = { IO { new MockDrsLocalizerDrsPathResolver(cloud.nio.impl.drs.MockDrsPaths.mockDrsConfig) } } + override def resolveSingleUrl(resolverObject: DrsLocalizerDrsPathResolver, drsUrlToResolve: UnresolvedDrsUrl): IO[ResolvedDrsUrl] = { + IO { + if (!fakeAccessUrls.contains(drsUrlToResolve) && !fakeGoogleUrls.contains(drsUrlToResolve)) { + throw new RuntimeException("Unexpected URI during testing") + } + fakeAccessUrls.getOrElse(drsUrlToResolve, fakeGoogleUrls.getOrElse(drsUrlToResolve, ResolvedDrsUrl(DrsResolverResponse(),"/12/3/", URIType.UNKNOWN))) + } + } } - class MockDrsLocalizerDrsPathResolver(drsConfig: DrsConfig) extends DrsLocalizerDrsPathResolver(drsConfig, FakeAccessTokenStrategy) { override def resolveDrs(drsPath: String, fields: NonEmptyList[DrsResolverField.Value]): IO[DrsResolverResponse] = { + val drsResolverResponse = DrsResolverResponse( size = Option(1234), hashes = FakeHashes diff --git a/cromwell-drs-localizer/src/test/scala/drs/localizer/downloaders/AccessUrlDownloaderSpec.scala b/cromwell-drs-localizer/src/test/scala/drs/localizer/downloaders/AccessUrlDownloaderSpec.scala deleted file mode 100644 index df7512dd81a..00000000000 --- a/cromwell-drs-localizer/src/test/scala/drs/localizer/downloaders/AccessUrlDownloaderSpec.scala +++ /dev/null @@ -1,59 +0,0 @@ -package drs.localizer.downloaders - -import cats.effect.ExitCode -import cats.syntax.validated._ -import cloud.nio.impl.drs.AccessUrl -import common.assertion.CromwellTimeoutSpec -import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers -import org.scalatest.prop.TableDrivenPropertyChecks._ - -class AccessUrlDownloaderSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers { - it should "return the correct download script for a url-only access URL, no requester pays" in { - val fakeDownloadLocation = "/root/foo/foo-123.bam" - val fakeAccessUrl = "http://abc/def/ghi.bam" - - val downloader = AccessUrlDownloader( - accessUrl = AccessUrl(url = fakeAccessUrl, headers = None), - downloadLoc = fakeDownloadLocation, - hashes = None - ) - - val expected = s"""mkdir -p $$(dirname '$fakeDownloadLocation') && rm -f '$fakeDownloadLocation' && getm --checksum-algorithm 'null' --checksum null --filepath '$fakeDownloadLocation' '$fakeAccessUrl'""".validNel - - downloader.generateDownloadScript shouldBe expected - } - - { - val results = Table( - ("exitCode", "stderr", "download result"), - (0, "", DownloadSuccess), - // In `getm` version 0.0.4 checksum failures currently exit 0. - (0, "oh me oh my: AssertionError: Checksum failed!!!", ChecksumFailure), - // Unrecognized because of non-zero exit code without an HTTP status, despite what looks like a checksum failure. - (1, "oh me oh my: AssertionError: Checksum failed!!!", UnrecognizedRetryableDownloadFailure(ExitCode(1))), - // Unrecognized because of zero exit status with stderr that does not look like a checksum failure. - (0, "what the", UnrecognizedRetryableDownloadFailure(ExitCode(0))), - // Unrecognized because of non-zero exit code without an HTTP status. - (1, " foobar ", UnrecognizedRetryableDownloadFailure(ExitCode(1))), - // Unrecognized because of zero exit status with stderr that does not look like a checksum failure. - (0, """ERROR:getm.cli possibly some words "status_code": 503 words""", UnrecognizedRetryableDownloadFailure(ExitCode(0))), - // Recognized because of non-zero exit status and an HTTP status. - (1, """ERROR:getm.cli possibly some words "status_code": 503 words""", RecognizedRetryableDownloadFailure(ExitCode(1))), - // Recognized because of non-zero exit status and an HTTP status. - (1, """ERROR:getm.cli possibly some words "status_code": 408 more words""", RecognizedRetryableDownloadFailure(ExitCode(1))), - // Recognized and non-retryable because of non-zero exit status and 404 HTTP status. - (1, """ERROR:getm.cli possibly some words "status_code": 404 even more words""", FatalDownloadFailure(ExitCode(1))), - // Unrecognized because of zero exit status and 404 HTTP status. - (0, """ERROR:getm.cli possibly some words "status_code": 404 even more words""", UnrecognizedRetryableDownloadFailure(ExitCode(0))), - ) - - val accessUrlDownloader = AccessUrlDownloader(null, null, null) - - forAll(results) { (exitCode, stderr, expected) => - it should s"produce $expected for exitCode $exitCode and stderr '$stderr'" in { - accessUrlDownloader.toDownloadResult(GetmResult(exitCode, stderr)) shouldBe expected - } - } - } -} diff --git a/cromwell-drs-localizer/src/test/scala/drs/localizer/downloaders/BulkAccessUrlDownloaderSpec.scala b/cromwell-drs-localizer/src/test/scala/drs/localizer/downloaders/BulkAccessUrlDownloaderSpec.scala new file mode 100644 index 00000000000..7b96ece8d0a --- /dev/null +++ b/cromwell-drs-localizer/src/test/scala/drs/localizer/downloaders/BulkAccessUrlDownloaderSpec.scala @@ -0,0 +1,114 @@ +package drs.localizer.downloaders + +import cats.effect.{ExitCode, IO} +import cloud.nio.impl.drs.{AccessUrl, DrsResolverResponse} +import common.assertion.CromwellTimeoutSpec +import org.scalatest.prop.TableDrivenPropertyChecks._ +import drs.localizer.{ResolvedDrsUrl, URIType} + +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +import java.nio.file.Path + +class BulkAccessUrlDownloaderSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers { + val ex1 = ResolvedDrsUrl(DrsResolverResponse(accessUrl = Option(AccessUrl("https://my.fake/url123", None))), "path/to/local/download/dest", URIType.ACCESS) + val ex2 = ResolvedDrsUrl(DrsResolverResponse(accessUrl = Option(AccessUrl("https://my.fake/url1234", None))), "path/to/local/download/dest2", URIType.ACCESS) + val ex3 = ResolvedDrsUrl(DrsResolverResponse(accessUrl = Option(AccessUrl("https://my.fake/url1235", None))), "path/to/local/download/dest3", URIType.ACCESS) + val emptyList : List[ResolvedDrsUrl] = List() + val oneElement: List[ResolvedDrsUrl] = List(ex1) + val threeElements: List[ResolvedDrsUrl] = List(ex1, ex2, ex3) + + it should "correctly parse a collection of Access Urls into a manifest.json" in { + val expected: String = + s"""|[ + | { + | "url" : "https://my.fake/url123", + | "filepath" : "path/to/local/download/dest" + | }, + | { + | "url" : "https://my.fake/url1234", + | "filepath" : "path/to/local/download/dest2" + | }, + | { + | "url" : "https://my.fake/url1235", + | "filepath" : "path/to/local/download/dest3" + | } + |]""".stripMargin + + val downloader = BulkAccessUrlDownloader(threeElements) + + val filepath: IO[Path] = downloader.generateJsonManifest(threeElements) + val source = scala.io.Source.fromFile(filepath.unsafeRunSync().toString) + val lines = try source.mkString finally source.close() + lines shouldBe expected + } + + it should "properly construct empty JSON array from empty list." in { + val expected: String = + s"""|[ + | + |]""".stripMargin + + val downloader = BulkAccessUrlDownloader(emptyList) + val filepath: IO[Path] = downloader.generateJsonManifest(emptyList) + val source = scala.io.Source.fromFile(filepath.unsafeRunSync().toString) + val lines = try source.mkString finally source.close() + lines shouldBe expected + } + + it should "properly construct JSON array from single element list." in { + val expected: String = + s"""|[ + | { + | "url" : "https://my.fake/url123", + | "filepath" : "path/to/local/download/dest" + | } + |]""".stripMargin + + val downloader = BulkAccessUrlDownloader(oneElement) + val filepath: IO[Path] = downloader.generateJsonManifest(oneElement) + val source = scala.io.Source.fromFile(filepath.unsafeRunSync().toString) + val lines = try source.mkString finally source.close() + lines shouldBe expected + } + + it should "properly construct the invocation command" in { + val downloader = BulkAccessUrlDownloader(oneElement) + val filepath: Path = downloader.generateJsonManifest(threeElements).unsafeRunSync() + val expected = s"""getm --manifest ${filepath.toString}""" + downloader.generateGetmCommand(filepath) shouldBe expected + } + + { + val results = Table( + ("exitCode", "stderr", "download result"), + (0, "", DownloadSuccess), + // In `getm` version 0.0.4 checksum failures currently exit 0. + (0, "oh me oh my: AssertionError: Checksum failed!!!", ChecksumFailure), + // Unrecognized because of non-zero exit code without an HTTP status, despite what looks like a checksum failure. + (1, "oh me oh my: AssertionError: Checksum failed!!!", UnrecognizedRetryableDownloadFailure(ExitCode(1))), + // Unrecognized because of zero exit status with stderr that does not look like a checksum failure. + (0, "what the", UnrecognizedRetryableDownloadFailure(ExitCode(0))), + // Unrecognized because of non-zero exit code without an HTTP status. + (1, " foobar ", UnrecognizedRetryableDownloadFailure(ExitCode(1))), + // Unrecognized because of zero exit status with stderr that does not look like a checksum failure. + (0, """ERROR:getm.cli possibly some words "status_code": 503 words""", UnrecognizedRetryableDownloadFailure(ExitCode(0))), + // Recognized because of non-zero exit status and an HTTP status. + (1, """ERROR:getm.cli possibly some words "status_code": 503 words""", RecognizedRetryableDownloadFailure(ExitCode(1))), + // Recognized because of non-zero exit status and an HTTP status. + (1, """ERROR:getm.cli possibly some words "status_code": 408 more words""", RecognizedRetryableDownloadFailure(ExitCode(1))), + // Recognized and non-retryable because of non-zero exit status and 404 HTTP status. + (1, """ERROR:getm.cli possibly some words "status_code": 404 even more words""", FatalDownloadFailure(ExitCode(1))), + // Unrecognized because of zero exit status and 404 HTTP status. + (0, """ERROR:getm.cli possibly some words "status_code": 404 even more words""", UnrecognizedRetryableDownloadFailure(ExitCode(0))), + ) + val bulkDownloader = BulkAccessUrlDownloader(null) + + forAll(results) { (exitCode, stderr, expected) => + it should s"produce $expected for exitCode $exitCode and stderr '$stderr'" in { + bulkDownloader.toDownloadResult(GetmResult(exitCode, stderr)) shouldBe expected + } + } + } +} diff --git a/cromwell.example.backends/GCPBATCH.conf b/cromwell.example.backends/GCPBATCH.conf new file mode 100644 index 00000000000..ba554e3322d --- /dev/null +++ b/cromwell.example.backends/GCPBATCH.conf @@ -0,0 +1,104 @@ +# This is an example of how you can use the Google Cloud Batch backend +# provider. *This is not a complete configuration file!* The +# content here should be copy pasted into the backend -> providers section +# of cromwell.example.backends/cromwell.examples.conf in the root of the repository. +# You should uncomment lines that you want to define, and read carefully to customize +# the file. + +# Documentation +# https://cromwell.readthedocs.io/en/stable/backends/Google/ + +backend { + default = GCPBATCH + + providers { + GCPBATCH { + actor-factory = "cromwell.backend.google.batch.GcpBatchBackendLifecycleActorFactory" + config { + # Google project + project = "my-cromwell-workflows" + + # Base bucket for workflow executions + root = "gs://my-cromwell-workflows-bucket" + + # Polling for completion backs-off gradually for slower-running jobs. + # This is the maximum polling interval (in seconds): + maximum-polling-interval = 600 + + # Optional Dockerhub Credentials. Can be used to access private docker images. + dockerhub { + # account = "" + # token = "" + } + + # Optional configuration to use high security network (Virtual Private Cloud) for running jobs. + # See https://cromwell.readthedocs.io/en/stable/backends/Google/ for more details. + # virtual-private-cloud { + # network-label-key = "network-key" + # auth = "application-default" + # } + + # Global pipeline timeout + # Defaults to 7 days; max 30 days + # batch-timeout = 7 days + + genomics { + # A reference to an auth defined in the `google` stanza at the top. This auth is used to create + # Batch Jobs and manipulate auth JSONs. + auth = "application-default" + + + // alternative service account to use on the launched compute instance + // NOTE: If combined with service account authorization, both that service account and this service account + // must be able to read and write to the 'root' GCS path + compute-service-account = "default" + + # Location to submit jobs to Batch and store job metadata. + location = "us-central1" + + # Specifies the minimum file size for `gsutil cp` to use parallel composite uploads during delocalization. + # Parallel composite uploads can result in a significant improvement in delocalization speed for large files + # but may introduce complexities in downloading such files from GCS, please see + # https://cloud.google.com/storage/docs/gsutil/commands/cp#parallel-composite-uploads for more information. + # + # If set to 0 parallel composite uploads are turned off. The default Cromwell configuration turns off + # parallel composite uploads, this sample configuration turns it on for files of 150M or larger. + parallel-composite-upload-threshold="150M" + } + + filesystems { + gcs { + # A reference to a potentially different auth for manipulating files via engine functions. + auth = "application-default" + # Google project which will be billed for the requests + project = "google-billing-project" + + caching { + # When a cache hit is found, the following duplication strategy will be followed to use the cached outputs + # Possible values: "copy", "reference". Defaults to "copy" + # "copy": Copy the output files + # "reference": DO NOT copy the output files but point to the original output files instead. + # Will still make sure than all the original output files exist and are accessible before + # going forward with the cache hit. + duplication-strategy = "copy" + } + } + } + + default-runtime-attributes { + cpu: 1 + failOnStderr: false + continueOnReturnCode: 0 + memory: "2048 MB" + bootDiskSizeGb: 10 + # Allowed to be a String, or a list of Strings + disks: "local-disk 10 SSD" + noAddress: false + preemptible: 0 + zones: ["us-central1-a", "us-central1-b"] + } + + } + } + } +} diff --git a/cromwell.example.backends/TES.conf b/cromwell.example.backends/TES.conf index a0bfb7cb141..509cd0d5d90 100644 --- a/cromwell.example.backends/TES.conf +++ b/cromwell.example.backends/TES.conf @@ -28,6 +28,22 @@ backend { disk: "2 GB" preemptible: false } + + # Backoff behavior for task status polling and execution retries are configurable, with defaults + # shown below. All four fields must be set for each backoff if overriding. + # + # poll-backoff { + # min: "10 seconds" + # max: "5 minutes" + # multiplier: 1.1 + # randomization-factor: 0.5 + # } + # execute-or-recover-backoff { + # min: "3 seconds" + # max: "30 seconds" + # multiplier: 1.1 + # randomization-factor: 0.5 + # } } } } diff --git a/database/migration/src/main/resources/metadata_changesets/set_table_role.xml b/database/migration/src/main/resources/metadata_changesets/set_table_role.xml new file mode 100644 index 00000000000..48a56bb6091 --- /dev/null +++ b/database/migration/src/main/resources/metadata_changesets/set_table_role.xml @@ -0,0 +1,34 @@ + + + + + + + + + + + + SELECT count(1) + FROM pg_roles + where '${sharedCromwellDbRole}' != '' and pg_roles.rolname = '${sharedCromwellDbRole}'; + + + + ALTER TABLE "CUSTOM_LABEL_ENTRY" OWNER TO ${sharedCromwellDbRole}; + ALTER TABLE "METADATA_ENTRY" OWNER TO ${sharedCromwellDbRole}; + ALTER TABLE "SUMMARY_QUEUE_ENTRY" OWNER TO ${sharedCromwellDbRole}; + ALTER TABLE "SUMMARY_STATUS_ENTRY" OWNER TO ${sharedCromwellDbRole}; + ALTER TABLE "WORKFLOW_METADATA_SUMMARY_ENTRY" OWNER TO ${sharedCromwellDbRole}; + ALTER TABLE "sqlmetadatadatabasechangelog" OWNER TO ${sharedCromwellDbRole}; + ALTER TABLE "sqlmetadatadatabasechangeloglock" OWNER TO ${sharedCromwellDbRole}; + + + + diff --git a/database/migration/src/main/resources/sql_metadata_changelog.xml b/database/migration/src/main/resources/sql_metadata_changelog.xml index 1c5b0837a89..0989ec2199a 100644 --- a/database/migration/src/main/resources/sql_metadata_changelog.xml +++ b/database/migration/src/main/resources/sql_metadata_changelog.xml @@ -19,5 +19,12 @@ + + + diff --git a/database/sql/src/main/scala/cromwell/database/slick/MetadataSlickDatabase.scala b/database/sql/src/main/scala/cromwell/database/slick/MetadataSlickDatabase.scala index 1405f7d101f..eb87f88d101 100644 --- a/database/sql/src/main/scala/cromwell/database/slick/MetadataSlickDatabase.scala +++ b/database/sql/src/main/scala/cromwell/database/slick/MetadataSlickDatabase.scala @@ -1,7 +1,6 @@ package cromwell.database.slick import java.sql.Timestamp - import cats.syntax.functor._ import cats.instances.future._ import com.typesafe.config.{Config, ConfigFactory} @@ -10,6 +9,7 @@ import cromwell.database.sql.MetadataSqlDatabase import cromwell.database.sql.SqlConverters._ import cromwell.database.sql.joins.{CallOrWorkflowQuery, CallQuery, MetadataJobQueryValue, WorkflowQuery} import cromwell.database.sql.tables.{CustomLabelEntry, InformationSchemaEntry, MetadataEntry, WorkflowMetadataSummaryEntry} +import net.ceedubs.ficus.Ficus._ import slick.basic.DatabasePublisher import slick.jdbc.{ResultSetConcurrency, ResultSetType} @@ -60,6 +60,8 @@ class MetadataSlickDatabase(originalDatabaseConfig: Config) import dataAccess.driver.api._ import MetadataSlickDatabase._ + lazy val pgLargeObjectWriteRole: Option[String] = originalDatabaseConfig.as[Option[String]]("pgLargeObjectWriteRole") + override def existsMetadataEntries()(implicit ec: ExecutionContext): Future[Boolean] = { val action = dataAccess.metadataEntriesExists.result runTransaction(action) @@ -87,6 +89,8 @@ class MetadataSlickDatabase(originalDatabaseConfig: Config) rootWorkflowIdKey, labelMetadataKey) + val roleSet = pgLargeObjectWriteRole.map(role => sqlu"""SET ROLE TO "#$role"""") + // These entries also require a write to the summary queue. def writeSummarizable(): Future[Unit] = if (partitioned.summarizableMetadata.isEmpty) Future.successful(()) else { val batchesToWrite = partitioned.summarizableMetadata.grouped(insertBatchSize).toList @@ -94,13 +98,13 @@ class MetadataSlickDatabase(originalDatabaseConfig: Config) val insertMetadata = dataAccess.metadataEntryIdsAutoInc ++= batch insertMetadata.flatMap(ids => writeSummaryQueueEntries(ids)) } - runTransaction(DBIO.sequence(insertActions)).void + runTransaction(DBIO.sequence(roleSet ++ insertActions)).void } // Non-summarizable metadata that only needs to go to the metadata table can be written much more efficiently // than summarizable metadata. def writeNonSummarizable(): Future[Unit] = if (partitioned.nonSummarizableMetadata.isEmpty) Future.successful(()) else { - val action = DBIO.sequence(partitioned.nonSummarizableMetadata.grouped(insertBatchSize).map(dataAccess.metadataEntries ++= _)) + val action = DBIO.sequence(roleSet ++ partitioned.nonSummarizableMetadata.grouped(insertBatchSize).map(dataAccess.metadataEntries ++= _)) runLobAction(action).void } @@ -515,4 +519,9 @@ class MetadataSlickDatabase(originalDatabaseConfig: Config) override def getMetadataTableSizeInformation()(implicit ec: ExecutionContext): Future[Option[InformationSchemaEntry]] = { runAction(dataAccess.metadataTableSizeInformation()) } + + override def getFailedJobsMetadataWithWorkflowId(rootWorkflowId: String)(implicit ec: ExecutionContext): Future[Vector[MetadataEntry]] = { + val isPostgres = databaseConfig.getValue("db.driver").toString.toLowerCase().contains("postgres") + runLobAction(dataAccess.failedJobsMetadataWithWorkflowId(rootWorkflowId, isPostgres)) + } } diff --git a/database/sql/src/main/scala/cromwell/database/slick/tables/MetadataEntryComponent.scala b/database/sql/src/main/scala/cromwell/database/slick/tables/MetadataEntryComponent.scala index 32256c448fb..1c1225c195d 100644 --- a/database/sql/src/main/scala/cromwell/database/slick/tables/MetadataEntryComponent.scala +++ b/database/sql/src/main/scala/cromwell/database/slick/tables/MetadataEntryComponent.scala @@ -310,6 +310,99 @@ trait MetadataEntryComponent { }).headOption } + def failedJobsMetadataWithWorkflowId(rootWorkflowId: String, isPostgres: Boolean) = { + val getMetadataEntryResult = GetResult(r => { + MetadataEntry(r.<<, r.<<, r.<<, r.<<, r.<<, r.nextClobOption().map(clob => new SerialClob(clob)), r.<<, r.<<, r.<<) + }) + + def dbIdentifierWrapper(identifier: String, isPostgres: Boolean) = { + if(isPostgres) s"${'"'}$identifier${'"'}" else identifier + } + + def dbMetadataValueColCheckName(isPostgres: Boolean): String = { + if(isPostgres) "obj.data" else "METADATA_VALUE" + } + + def attemptAndIndexSelectStatement(callFqn: String, scatterIndex: String, retryAttempt: String, variablePrefix: String): String = { + s"SELECT ${callFqn}, MAX(COALESCE(${scatterIndex}, 0)) as ${variablePrefix}Scatter, MAX(COALESCE(${retryAttempt}, 0)) AS ${variablePrefix}Retry" + } + + def pgObjectInnerJoinStatement(isPostgres: Boolean, metadataValColName: String): String = { + if(isPostgres) s"INNER JOIN pg_largeobject obj ON me.${metadataValColName} = cast(obj.loid as text)" else "" + } + + def failedTaskGroupByClause(metadataValue: String, callFqn: String): String = { + return s"GROUP BY ${callFqn}, ${metadataValue}" + } + + val workflowUuid = dbIdentifierWrapper("WORKFLOW_EXECUTION_UUID", isPostgres) + val callFqn = dbIdentifierWrapper("CALL_FQN", isPostgres) + val scatterIndex = dbIdentifierWrapper("JOB_SCATTER_INDEX", isPostgres) + val retryAttempt = dbIdentifierWrapper("JOB_RETRY_ATTEMPT", isPostgres) + val metadataKey = dbIdentifierWrapper("METADATA_KEY", isPostgres) + val metadataValueType = dbIdentifierWrapper("METADATA_VALUE_TYPE", isPostgres) + val metadataTimestamp = dbIdentifierWrapper("METADATA_TIMESTAMP", isPostgres) + val metadataJournalId = dbIdentifierWrapper("METADATA_JOURNAL_ID", isPostgres) + val rootUuid = dbIdentifierWrapper("ROOT_WORKFLOW_EXECUTION_UUID", isPostgres) + val metadataValue = dbIdentifierWrapper("METADATA_VALUE", isPostgres) + val metadataEntry = dbIdentifierWrapper("METADATA_ENTRY", isPostgres) + val wmse = dbIdentifierWrapper("WORKFLOW_METADATA_SUMMARY_ENTRY", isPostgres) + val resultSetColumnNames = s"me.${workflowUuid}, me.${callFqn}, me.${scatterIndex}, me.${retryAttempt}, me.${metadataKey}, me.${metadataValue}, me.${metadataValueType}, me.${metadataTimestamp}, me.${metadataJournalId}" + + val query = + sql""" + SELECT #${resultSetColumnNames} + FROM #${metadataEntry} me + INNER JOIN ( + #${attemptAndIndexSelectStatement(callFqn, scatterIndex, retryAttempt, "failed")} + FROM #${metadataEntry} me + INNER JOIN #${wmse} wmse + ON wmse.#${workflowUuid} = me.#${workflowUuid} + #${pgObjectInnerJoinStatement(isPostgres, metadataValue)} + WHERE (wmse.#${rootUuid} = $rootWorkflowId OR wmse.#${workflowUuid} = $rootWorkflowId) + AND (me.#${metadataKey} in ('executionStatus', 'backendStatus') AND #${dbMetadataValueColCheckName(isPostgres)} = 'Failed') + #${failedTaskGroupByClause(dbMetadataValueColCheckName(isPostgres), callFqn)} + HAVING #${dbMetadataValueColCheckName(isPostgres)} = 'Failed' + ) AS failedCalls + ON me.#${callFqn} = failedCalls.#${callFqn} + INNER JOIN ( + #${attemptAndIndexSelectStatement(callFqn, scatterIndex, retryAttempt, "max")} + FROM #${metadataEntry} me + INNER JOIN #${wmse} wmse + ON wmse.#${workflowUuid} = me.#${workflowUuid} + WHERE (wmse.#${rootUuid} = $rootWorkflowId OR wmse.#${workflowUuid} = $rootWorkflowId) + AND #${callFqn} IS NOT NULL + GROUP BY #${callFqn} + ) maxCalls + ON me.#${callFqn} = maxCalls.#${callFqn} + LEFT JOIN ( + SELECT DISTINCT #${callFqn} + FROM #${metadataEntry} me + INNER JOIN #${wmse} wmse + ON wmse.#${workflowUuid} = me.#${workflowUuid} + WHERE (wmse.#${rootUuid} = $rootWorkflowId OR wmse.#${workflowUuid} = $rootWorkflowId) + AND me.#${metadataKey} = 'subWorkflowId' + GROUP BY #${callFqn} + ) AS avoidedCalls + ON me.#${callFqn} = avoidedCalls.#${callFqn} + INNER JOIN #${wmse} wmse + ON wmse.#${workflowUuid} = me.#${workflowUuid} + WHERE avoidedCalls.#${callFqn} IS NULL + AND COALESCE(me.#${scatterIndex}, 0) = maxCalls.maxScatter + AND COALESCE(me.#${retryAttempt}, 0) = maxCalls.maxRetry + AND failedCalls.failedScatter = maxCalls.maxScatter + AND failedCalls.failedRetry = maxCalls.maxRetry + GROUP BY #${resultSetColumnNames} + HAVING me.#${workflowUuid} IN ( + SELECT DISTINCT wmse.#${workflowUuid} + FROM #${wmse} wmse + WHERE wmse.#${rootUuid} = $rootWorkflowId OR wmse.#${workflowUuid} = $rootWorkflowId + ) + """ + + query.as(getMetadataEntryResult) + } + private[this] def metadataEntryHasMetadataKeysLike(metadataEntry: MetadataEntries, metadataKeysToFilterFor: List[String], metadataKeysToFilterOut: List[String]): Rep[Boolean] = { diff --git a/database/sql/src/main/scala/cromwell/database/sql/MetadataSqlDatabase.scala b/database/sql/src/main/scala/cromwell/database/sql/MetadataSqlDatabase.scala index 30534818f85..9139c819999 100644 --- a/database/sql/src/main/scala/cromwell/database/sql/MetadataSqlDatabase.scala +++ b/database/sql/src/main/scala/cromwell/database/sql/MetadataSqlDatabase.scala @@ -197,4 +197,6 @@ trait MetadataSqlDatabase extends SqlDatabase { def countWorkflowsLeftToDeleteThatEndedOnOrBeforeThresholdTimestamp(workflowEndTimestampThreshold: Timestamp)(implicit ec: ExecutionContext): Future[Int] def getMetadataTableSizeInformation()(implicit ec: ExecutionContext): Future[Option[InformationSchemaEntry]] + + def getFailedJobsMetadataWithWorkflowId(rootWorkflowId: String)(implicit ec: ExecutionContext): Future[Vector[MetadataEntry]] } diff --git a/dockerHashing/src/main/scala/cromwell/docker/DockerImageIdentifier.scala b/dockerHashing/src/main/scala/cromwell/docker/DockerImageIdentifier.scala index 9fbd173303b..a798f351f17 100644 --- a/dockerHashing/src/main/scala/cromwell/docker/DockerImageIdentifier.scala +++ b/dockerHashing/src/main/scala/cromwell/docker/DockerImageIdentifier.scala @@ -1,5 +1,7 @@ package cromwell.docker +import cromwell.docker.registryv2.flows.azure.AzureContainerRegistry + import scala.util.{Failure, Success, Try} sealed trait DockerImageIdentifier { @@ -14,7 +16,14 @@ sealed trait DockerImageIdentifier { lazy val name = repository map { r => s"$r/$image" } getOrElse image // The name of the image with a repository prefix if a repository was specified, or with a default repository prefix of // "library" if no repository was specified. - lazy val nameWithDefaultRepository = repository.getOrElse("library") + s"/$image" + lazy val nameWithDefaultRepository = { + // In ACR, the repository is part of the registry domain instead of the path + // e.g. `terrabatchdev.azurecr.io` + if (host.exists(_.contains(AzureContainerRegistry.domain))) + image + else + repository.getOrElse("library") + s"/$image" + } lazy val hostAsString = host map { h => s"$h/" } getOrElse "" // The full name of this image, including a repository prefix only if a repository was explicitly specified. lazy val fullName = s"$hostAsString$name:$reference" diff --git a/dockerHashing/src/main/scala/cromwell/docker/DockerInfoActor.scala b/dockerHashing/src/main/scala/cromwell/docker/DockerInfoActor.scala index 40a4c74cb9b..3ebb8d98f39 100644 --- a/dockerHashing/src/main/scala/cromwell/docker/DockerInfoActor.scala +++ b/dockerHashing/src/main/scala/cromwell/docker/DockerInfoActor.scala @@ -14,6 +14,7 @@ import cromwell.core.actor.StreamIntegration.{BackPressure, StreamContext} import cromwell.core.{Dispatcher, DockerConfiguration} import cromwell.docker.DockerInfoActor._ import cromwell.docker.registryv2.DockerRegistryV2Abstract +import cromwell.docker.registryv2.flows.azure.AzureContainerRegistry import cromwell.docker.registryv2.flows.dockerhub.DockerHubRegistry import cromwell.docker.registryv2.flows.google.GoogleRegistry import cromwell.docker.registryv2.flows.quay.QuayRegistry @@ -232,6 +233,7 @@ object DockerInfoActor { // To add a new registry, simply add it to that list List( + ("azure", { c: DockerRegistryConfig => new AzureContainerRegistry(c) }), ("dockerhub", { c: DockerRegistryConfig => new DockerHubRegistry(c) }), ("google", { c: DockerRegistryConfig => new GoogleRegistry(c) }), ("quay", { c: DockerRegistryConfig => new QuayRegistry(c) }) diff --git a/dockerHashing/src/main/scala/cromwell/docker/registryv2/DockerRegistryV2Abstract.scala b/dockerHashing/src/main/scala/cromwell/docker/registryv2/DockerRegistryV2Abstract.scala index 1a5a02d95bd..bb25cb4bc3d 100644 --- a/dockerHashing/src/main/scala/cromwell/docker/registryv2/DockerRegistryV2Abstract.scala +++ b/dockerHashing/src/main/scala/cromwell/docker/registryv2/DockerRegistryV2Abstract.scala @@ -70,8 +70,9 @@ object DockerRegistryV2Abstract { } // Placeholder exceptions that can be carried through IO before being converted to a DockerInfoFailedResponse - private class Unauthorized() extends Exception - private class NotFound() extends Exception + private class Unauthorized(message: String) extends Exception(message) + private class NotFound(message: String) extends Exception(message) + private class UnknownError(message: String) extends Exception(message) } /** @@ -106,7 +107,7 @@ abstract class DockerRegistryV2Abstract(override val config: DockerRegistryConfi } // Execute a request. No retries because they're expected to already be handled by the client - private def executeRequest[A](request: IO[Request[IO]], handler: Response[IO] => IO[A])(implicit client: Client[IO]): IO[A] = { + protected def executeRequest[A](request: IO[Request[IO]], handler: Response[IO] => IO[A])(implicit client: Client[IO]): IO[A] = { request.flatMap(client.run(_).use[IO, A](handler)) } @@ -131,7 +132,10 @@ abstract class DockerRegistryV2Abstract(override val config: DockerRegistryConfi protected def getDockerResponse(token: Option[String], dockerInfoContext: DockerInfoContext)(implicit client: Client[IO]): IO[DockerInfoSuccessResponse] = { val requestDockerManifest = manifestRequest(token, dockerInfoContext.dockerImageID, AcceptDockerManifestV2Header) lazy val requestOCIManifest = manifestRequest(token, dockerInfoContext.dockerImageID, AcceptOCIIndexV1Header) - def tryOCIManifest(err: Throwable) = executeRequest(requestOCIManifest, handleManifestResponse(dockerInfoContext, token)) + def tryOCIManifest(err: Throwable) = { + logger.info(s"Manifest request failed for docker manifest V2, falling back to OCI manifest. Image: ${dockerInfoContext.dockerImageID}", err) + executeRequest(requestOCIManifest, handleManifestResponse(dockerInfoContext, token)) + } // Try to execute a request using the Docker Manifest format, and if that fails, try using the newer OCI manifest format executeRequest(requestDockerManifest, handleManifestResponse(dockerInfoContext, token)) .handleErrorWith(tryOCIManifest) @@ -184,7 +188,7 @@ abstract class DockerRegistryV2Abstract(override val config: DockerRegistryConfi /** * Builds the token request */ - private def buildTokenRequest(dockerInfoContext: DockerInfoContext): IO[Request[IO]] = { + protected def buildTokenRequest(dockerInfoContext: DockerInfoContext): IO[Request[IO]] = { val request = Method.GET( buildTokenRequestUri(dockerInfoContext.dockerImageID), buildTokenRequestHeaders(dockerInfoContext): _* @@ -216,7 +220,7 @@ abstract class DockerRegistryV2Abstract(override val config: DockerRegistryConfi * Request to get the manifest, using the auth token if provided */ private def manifestRequest(token: Option[String], imageId: DockerImageIdentifier, manifestHeader: Accept): IO[Request[IO]] = { - val authorizationHeader = token.map(t => Authorization(Credentials.Token(AuthScheme.Bearer, t))) + val authorizationHeader: Option[Authorization] = token.map(t => Authorization(Credentials.Token(AuthScheme.Bearer, t))) val request = Method.GET( buildManifestUri(imageId), List( @@ -282,9 +286,9 @@ abstract class DockerRegistryV2Abstract(override val config: DockerRegistryConfi private def getDigestFromResponse(response: Response[IO]): IO[DockerHashResult] = response match { case Status.Successful(r) => extractDigestFromHeaders(r.headers) - case Status.Unauthorized(_) => IO.raiseError(new Unauthorized) - case Status.NotFound(_) => IO.raiseError(new NotFound) - case failed => failed.as[String].flatMap(body => IO.raiseError(new Exception(s"Failed to get manifest: $body")) + case Status.Unauthorized(r) => r.as[String].flatMap(body => IO.raiseError(new Unauthorized(r.status.toString + " " + body))) + case Status.NotFound(r) => r.as[String].flatMap(body => IO.raiseError(new NotFound(r.status.toString + " " + body))) + case failed => failed.as[String].flatMap(body => IO.raiseError(new UnknownError(failed.status.toString + " " + body)) ) } diff --git a/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/azure/AcrAccessToken.scala b/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/azure/AcrAccessToken.scala new file mode 100644 index 00000000000..bf0841e2547 --- /dev/null +++ b/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/azure/AcrAccessToken.scala @@ -0,0 +1,3 @@ +package cromwell.docker.registryv2.flows.azure + +case class AcrAccessToken(access_token: String) diff --git a/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/azure/AcrRefreshToken.scala b/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/azure/AcrRefreshToken.scala new file mode 100644 index 00000000000..aa6a6d17eb5 --- /dev/null +++ b/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/azure/AcrRefreshToken.scala @@ -0,0 +1,3 @@ +package cromwell.docker.registryv2.flows.azure + +case class AcrRefreshToken(refresh_token: String) diff --git a/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/azure/AzureContainerRegistry.scala b/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/azure/AzureContainerRegistry.scala new file mode 100644 index 00000000000..46dfd116bc6 --- /dev/null +++ b/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/azure/AzureContainerRegistry.scala @@ -0,0 +1,149 @@ +package cromwell.docker.registryv2.flows.azure + +import cats.data.Validated.{Invalid, Valid} +import cats.effect.IO +import com.typesafe.scalalogging.LazyLogging +import common.validation.ErrorOr.ErrorOr +import cromwell.cloudsupport.azure.AzureCredentials +import cromwell.docker.DockerInfoActor.DockerInfoContext +import cromwell.docker.{DockerImageIdentifier, DockerRegistryConfig} +import cromwell.docker.registryv2.DockerRegistryV2Abstract +import org.http4s.{Header, Request, Response, Status} +import cromwell.docker.registryv2.flows.azure.AzureContainerRegistry.domain +import org.http4s.circe.jsonOf +import org.http4s.client.Client +import io.circe.generic.auto._ +import org.http4s._ + + +class AzureContainerRegistry(config: DockerRegistryConfig) extends DockerRegistryV2Abstract(config) with LazyLogging { + + /** + * (e.g registry-1.docker.io) + */ + override protected def registryHostName(dockerImageIdentifier: DockerImageIdentifier): String = + dockerImageIdentifier.host.getOrElse("") + + override def accepts(dockerImageIdentifier: DockerImageIdentifier): Boolean = + dockerImageIdentifier.hostAsString.contains(domain) + + override protected def authorizationServerHostName(dockerImageIdentifier: DockerImageIdentifier): String = + dockerImageIdentifier.host.getOrElse("") + + /** + * In Azure, service name does not exist at the registry level, it varies per repo, e.g. `terrabatchdev.azurecr.io` + */ + override def serviceName: Option[String] = + throw new Exception("ACR service name is host of user-defined registry, must derive from `DockerImageIdentifier`") + + /** + * Builds the list of headers for the token request + */ + override protected def buildTokenRequestHeaders(dockerInfoContext: DockerInfoContext): List[Header] = { + List(contentTypeHeader) + } + + private val contentTypeHeader: Header = { + import org.http4s.headers.`Content-Type` + import org.http4s.MediaType + + `Content-Type`(MediaType.application.`x-www-form-urlencoded`) + } + + private def getRefreshToken(authServerHostname: String, defaultAccessToken: String): IO[Request[IO]] = { + import org.http4s.Uri.{Authority, Scheme} + import org.http4s.client.dsl.io._ + import org.http4s._ + + val uri = Uri.apply( + scheme = Option(Scheme.https), + authority = Option(Authority(host = Uri.RegName(authServerHostname))), + path = "/oauth2/exchange", + query = Query.empty + ) + + org.http4s.Method.POST( + UrlForm( + "service" -> authServerHostname, + "access_token" -> defaultAccessToken, + "grant_type" -> "access_token" + ), + uri, + List(contentTypeHeader): _* + ) + } + + /* + Unlike other repositories, Azure reserves `GET /oauth2/token` for Basic Authentication [0] + In order to use Oauth we must `POST /oauth2/token` [1] + + [0] https://github.com/Azure/acr/blob/main/docs/Token-BasicAuth.md#using-the-token-api + [1] https://github.com/Azure/acr/blob/main/docs/AAD-OAuth.md#calling-post-oauth2token-to-get-an-acr-access-token + */ + private def getDockerAccessToken(hostname: String, repository: String, refreshToken: String): IO[Request[IO]] = { + import org.http4s.Uri.{Authority, Scheme} + import org.http4s.client.dsl.io._ + import org.http4s._ + + val uri = Uri.apply( + scheme = Option(Scheme.https), + authority = Option(Authority(host = Uri.RegName(hostname))), + path = "/oauth2/token", + query = Query.empty + ) + + org.http4s.Method.POST( + UrlForm( + // Tricky behavior - invalid `repository` values return a 200 with a valid-looking token. + // However, the token will cause 401s on all subsequent requests. + "scope" -> s"repository:$repository:pull", + "service" -> hostname, + "refresh_token" -> refreshToken, + "grant_type" -> "refresh_token" + ), + uri, + List(contentTypeHeader): _* + ) + } + + override protected def getToken(dockerInfoContext: DockerInfoContext)(implicit client: Client[IO]): IO[Option[String]] = { + val hostname = authorizationServerHostName(dockerInfoContext.dockerImageID) + val maybeAadAccessToken: ErrorOr[String] = AzureCredentials.getAccessToken(None) // AAD token suitable for get-refresh-token request + val repository = dockerInfoContext.dockerImageID.image // ACR uses what we think of image name, as the repository + + // Top-level flow: AAD access token -> refresh token -> ACR access token + maybeAadAccessToken match { + case Valid(accessToken) => + (for { + refreshToken <- executeRequest(getRefreshToken(hostname, accessToken), parseRefreshToken) + dockerToken <- executeRequest(getDockerAccessToken(hostname, repository, refreshToken), parseAccessToken) + } yield dockerToken).map(Option.apply) + case Invalid(errors) => + IO.raiseError( + new Exception(s"Could not obtain AAD token to exchange for ACR refresh token. Error(s): ${errors}") + ) + } + } + + implicit val refreshTokenDecoder: EntityDecoder[IO, AcrRefreshToken] = jsonOf[IO, AcrRefreshToken] + implicit val accessTokenDecoder: EntityDecoder[IO, AcrAccessToken] = jsonOf[IO, AcrAccessToken] + + private def parseRefreshToken(response: Response[IO]): IO[String] = response match { + case Status.Successful(r) => r.as[AcrRefreshToken].map(_.refresh_token) + case r => + r.as[String].flatMap(b => IO.raiseError(new Exception(s"Request failed with status ${r.status.code} and body $b"))) + } + + private def parseAccessToken(response: Response[IO]): IO[String] = response match { + case Status.Successful(r) => r.as[AcrAccessToken].map(_.access_token) + case r => + r.as[String].flatMap(b => IO.raiseError(new Exception(s"Request failed with status ${r.status.code} and body $b"))) + } + +} + +object AzureContainerRegistry { + + def domain: String = "azurecr.io" + +} diff --git a/dockerHashing/src/test/scala/cromwell/docker/DockerImageIdentifierSpec.scala b/dockerHashing/src/test/scala/cromwell/docker/DockerImageIdentifierSpec.scala index 00c738dbede..41353934fc6 100644 --- a/dockerHashing/src/test/scala/cromwell/docker/DockerImageIdentifierSpec.scala +++ b/dockerHashing/src/test/scala/cromwell/docker/DockerImageIdentifierSpec.scala @@ -18,6 +18,7 @@ class DockerImageIdentifierSpec extends AnyFlatSpec with CromwellTimeoutSpec wit ("broad/cromwell/submarine", None, Option("broad/cromwell"), "submarine", "latest"), ("gcr.io/google/slim", Option("gcr.io"), Option("google"), "slim", "latest"), ("us-central1-docker.pkg.dev/google/slim", Option("us-central1-docker.pkg.dev"), Option("google"), "slim", "latest"), + ("terrabatchdev.azurecr.io/postgres", Option("terrabatchdev.azurecr.io"), None, "postgres", "latest"), // With tags ("ubuntu:latest", None, None, "ubuntu", "latest"), ("ubuntu:1235-SNAP", None, None, "ubuntu", "1235-SNAP"), @@ -25,6 +26,7 @@ class DockerImageIdentifierSpec extends AnyFlatSpec with CromwellTimeoutSpec wit ("index.docker.io:9999/ubuntu:170904", Option("index.docker.io:9999"), None, "ubuntu", "170904"), ("localhost:5000/capture/transwf:170904", Option("localhost:5000"), Option("capture"), "transwf", "170904"), ("quay.io/biocontainers/platypus-variant:0.8.1.1--htslib1.5_0", Option("quay.io"), Option("biocontainers"), "platypus-variant", "0.8.1.1--htslib1.5_0"), + ("terrabatchdev.azurecr.io/postgres:latest", Option("terrabatchdev.azurecr.io"), None, "postgres", "latest"), // Very long tags with trailing spaces cause problems for the re engine ("someuser/someimage:supercalifragilisticexpialidociouseventhoughthesoundofitissomethingquiteatrociousifyousayitloudenoughyoullalwayssoundprecocious ", None, Some("someuser"), "someimage", "supercalifragilisticexpialidociouseventhoughthesoundofitissomethingquiteatrociousifyousayitloudenoughyoullalwayssoundprecocious") ) diff --git a/dockerHashing/src/test/scala/cromwell/docker/DockerInfoActorSpec.scala b/dockerHashing/src/test/scala/cromwell/docker/DockerInfoActorSpec.scala index e41be33f762..72baec70825 100644 --- a/dockerHashing/src/test/scala/cromwell/docker/DockerInfoActorSpec.scala +++ b/dockerHashing/src/test/scala/cromwell/docker/DockerInfoActorSpec.scala @@ -2,6 +2,7 @@ package cromwell.docker import cromwell.core.Tags.IntegrationTest import cromwell.docker.DockerInfoActor._ +import cromwell.docker.registryv2.flows.azure.AzureContainerRegistry import cromwell.docker.registryv2.flows.dockerhub.DockerHubRegistry import cromwell.docker.registryv2.flows.google.GoogleRegistry import cromwell.docker.registryv2.flows.quay.QuayRegistry @@ -18,7 +19,8 @@ class DockerInfoActorSpec extends DockerRegistrySpec with AnyFlatSpecLike with M override protected lazy val registryFlows = List( new DockerHubRegistry(DockerRegistryConfig.default), new GoogleRegistry(DockerRegistryConfig.default), - new QuayRegistry(DockerRegistryConfig.default) + new QuayRegistry(DockerRegistryConfig.default), + new AzureContainerRegistry(DockerRegistryConfig.default) ) it should "retrieve a public docker hash" taggedAs IntegrationTest in { @@ -50,6 +52,16 @@ class DockerInfoActorSpec extends DockerRegistrySpec with AnyFlatSpecLike with M hash should not be empty } } + + it should "retrieve a private docker hash on acr" taggedAs IntegrationTest in { + dockerActor ! makeRequest("terrabatchdev.azurecr.io/postgres:latest") + + expectMsgPF(15 second) { + case DockerInfoSuccessResponse(DockerInformation(DockerHashResult(alg, hash), _), _) => + alg shouldBe "sha256" + hash should not be empty + } + } it should "send image not found message back if the image does not exist" taggedAs IntegrationTest in { val notFound = makeRequest("ubuntu:nonexistingtag") diff --git a/docs/Releases.md b/docs/Releases.md index 1a446c90bc2..9699d7d44bc 100644 --- a/docs/Releases.md +++ b/docs/Releases.md @@ -6,8 +6,9 @@ You are strongly encouraged to use the latest release of Cromwell whenever possi Cromwell is distributed as a conda package on [conda-forge](https://conda-forge.org/). These instructions need to be followed for [installing the miniconda distribution](https://docs.conda.io/en/latest/miniconda.html) and [activating the conda-forge channel](https://conda-forge.org/#about). After this Cromwell can be installed in the -base environment with `conda install cromwell` or a separate environment for Cromwell can be created with -`conda create -n cromwell cromwell`. If you are using Cromwell for bioinformatics workflows, you might like to take +base environment with `conda install -c conda-forge cromwell` or a separate environment for Cromwell can be created with +`conda create -n cromwell cromwell` (be sure to activate the conda-forge channel first). +If you are using Cromwell for bioinformatics workflows, you might like to take a look at [bioconda](http://bioconda.github.io) as well. The conda installation of Cromwell comes with a wrapper that locates the jar for you and allows for running Cromwell or Womtool with a `cromwell run`, `womtool validate` or other command. Conda also installs the required Java dependency diff --git a/docs/api/RESTAPI.md b/docs/api/RESTAPI.md index ffb2af329de..64ffe19a3fd 100644 --- a/docs/api/RESTAPI.md +++ b/docs/api/RESTAPI.md @@ -1,5 +1,5 @@