Skip to content

Commit

Permalink
WX-927 JDR DRS resolution (#7555)
Browse files Browse the repository at this point in the history
  • Loading branch information
mcovarr authored Sep 27, 2024
1 parent d81e805 commit 9853b52
Show file tree
Hide file tree
Showing 10 changed files with 233 additions and 4 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
version 1.0

workflow drs_usa_jdr {
input {
File file1
File file2
}

call localize_jdr_drs_with_usa {
input:
file1 = file1,
file2 = file2
}

call skip_localize_jdr_drs_with_usa {
input:
file1 = file1,
file2 = file2
}

call read_drs_with_usa {
input:
file1 = file1,
file2 = file2
}

output {
String path1 = localize_jdr_drs_with_usa.path1
String path2 = localize_jdr_drs_with_usa.path2
String hash1 = localize_jdr_drs_with_usa.hash1
String hash2 = localize_jdr_drs_with_usa.hash2
Float size1 = localize_jdr_drs_with_usa.size1
Float size2 = localize_jdr_drs_with_usa.size2
String cloud1 = skip_localize_jdr_drs_with_usa.path1
String cloud2 = skip_localize_jdr_drs_with_usa.path2
Map[String, String] map1 = read_drs_with_usa.map1
Map[String, String] map2 = read_drs_with_usa.map2
}
}

task localize_jdr_drs_with_usa {
input {
File file1
File file2
}

command <<<
echo ~{file1} > path1
echo ~{file2} > path2
md5sum ~{file1} | cut -c1-32 > hash1
md5sum ~{file2} | cut -c1-32 > hash2
>>>

output {
String path1 = read_string("path1")
String path2 = read_string("path2")
String hash1 = read_string("hash1")
String hash2 = read_string("hash2")
Float size1 = size(file1)
Float size2 = size(file2)
}

runtime {
docker: "ubuntu:latest"
backend: "GCPBATCH-usa"
}
}

task skip_localize_jdr_drs_with_usa {
input {
File file1
File file2
}

parameter_meta {
file1: { localization_optional: true }
file2: { localization_optional: true }
}

command <<<
echo ~{file1} > path1
echo ~{file2} > path2
>>>

output {
String path1 = read_string("path1")
String path2 = read_string("path2")
}

runtime {
docker: "ubuntu:latest"
backend: "GCPBATCH-usa"
}
}

task read_drs_with_usa {
input {
File file1
File file2
}

command <<<
echo file is read by the engine
>>>

output {
Map[String, String] map1 = read_json(file1)
Map[String, String] map2 = read_json(file2)
}

runtime {
docker: "ubuntu:latest"
backend: "GCPBATCH-usa"
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: drs_usa_jdr
testFormat: WorkflowSuccess
backends: ["papi-v2-usa", "GCPBATCH_NEEDS_ALT"]
backends: ["papi-v2-usa", "GCPBATCH_ALT"]
tags: [ drs ]
skipDescribeEndpointValidation: true

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: drs_usa_jdr_preresolve
testFormat: WorkflowSuccess
backends: ["papi-v2-usa", GCPBATCH_NEEDS_ALT]
backends: ["papi-v2-usa", GCPBATCH_ALT]
tags: [ drs ]
skipDescribeEndpointValidation: true

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: gcpbatch_drs_usa_jdr
testFormat: WorkflowSuccess
backends: ["GCPBATCH-usa"]
tags: [ drs ]
skipDescribeEndpointValidation: true

files {
workflow: drs_tests/gcpbatch_drs_usa_jdr.wdl
options-dir: "Error: BA-6546 The environment variable CROMWELL_BUILD_RESOURCES_DIRECTORY must be set/export pointing to a valid path such as '${YOUR_CROMWELL_DIR}/target/ci/resources'"
options-dir: ${?CROMWELL_BUILD_RESOURCES_DIRECTORY}
options: ${files.options-dir}/papi_v2_usa.options.json
inputs: drs_tests/drs_usa_jdr.inputs
}

metadata {
workflowName: drs_usa_jdr
status: Succeeded

"outputs.drs_usa_jdr.path1" =
"/mnt/disks/cromwell_root/drs_localization_paths/CromwellSimpleWithFilerefs/hello_jade.json"
"outputs.drs_usa_jdr.path2" =
"/mnt/disks/cromwell_root/drs_localization_paths/CromwellSimpleWithFilerefs2/hello_jade_2.json"
"outputs.drs_usa_jdr.hash1" = "faf12e94c25bef7df62e4a5eb62573f5"
"outputs.drs_usa_jdr.hash2" = "19e1b021628130fda04c79ee9a056b67"
"outputs.drs_usa_jdr.size1" = 18.0
"outputs.drs_usa_jdr.size2" = 38.0
# This JDR file has a gsUri that doesn't end in /fileName so it must be downloaded with the DRS localizer
"outputs.drs_usa_jdr.cloud1" =
"/mnt/disks/cromwell_root/drs_localization_paths/CromwellSimpleWithFilerefs/hello_jade.json"
# This JDR file has a gsUri that can skip localization
"outputs.drs_usa_jdr.cloud2" =
"gs://broad-jade-dev-data-bucket/e1941fb9-6537-4e1a-b70d-34352a3a7817/ad783b60-aeba-4055-8f7b-194880f37259/hello_jade_2.json"
"outputs.drs_usa_jdr.map1.hello" = "jade"
"outputs.drs_usa_jdr.map2.hello" = "jade"
"outputs.drs_usa_jdr.map2.attempt" = "2"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: gcpbatch_drs_usa_jdr_preresolve
testFormat: WorkflowSuccess
backends: ["GCPBATCH-usa"]
tags: [ drs ]
skipDescribeEndpointValidation: true

files {
workflow: drs_tests/gcpbatch_drs_usa_jdr.wdl
options-dir: "Error: BA-6546 The environment variable CROMWELL_BUILD_RESOURCES_DIRECTORY must be set/export pointing to a valid path such as '${YOUR_CROMWELL_DIR}/target/ci/resources'"
options-dir: ${?CROMWELL_BUILD_RESOURCES_DIRECTORY}
options: ${files.options-dir}/gcpbatch_papi_v2_usa_preresolve.options.json
inputs: drs_tests/drs_usa_jdr.inputs
}

metadata {
workflowName: drs_usa_jdr
status: Succeeded

"outputs.drs_usa_jdr.path1" =
"/mnt/disks/cromwell_root/drs_localization_paths/CromwellSimpleWithFilerefs/hello_jade.json"
# This JDR file has a gsUri that can be preresolved to a regular GCS file for improved localization performance.
# However this means that the file's container path is determined by the GCS localization logic and not the
# `localizationPath`-aware DRS localization logic. The GCS localization logic always uses a containerized version
# of the GCS path, which is what this expectation represents.
"outputs.drs_usa_jdr.path2" =
"/mnt/disks/cromwell_root/broad-jade-dev-data-bucket/e1941fb9-6537-4e1a-b70d-34352a3a7817/ad783b60-aeba-4055-8f7b-194880f37259/hello_jade_2.json"
"outputs.drs_usa_jdr.hash1" = "faf12e94c25bef7df62e4a5eb62573f5"
"outputs.drs_usa_jdr.hash2" = "19e1b021628130fda04c79ee9a056b67"
"outputs.drs_usa_jdr.size1" = 18.0
"outputs.drs_usa_jdr.size2" = 38.0
# This JDR file has a gsUri that doesn't end in /fileName so it must be downloaded with the DRS localizer
"outputs.drs_usa_jdr.cloud1" =
"/mnt/disks/cromwell_root/drs_localization_paths/CromwellSimpleWithFilerefs/hello_jade.json"
# This JDR file has a gsUri that can skip localization
"outputs.drs_usa_jdr.cloud2" =
"gs://broad-jade-dev-data-bucket/e1941fb9-6537-4e1a-b70d-34352a3a7817/ad783b60-aeba-4055-8f7b-194880f37259/hello_jade_2.json"
"outputs.drs_usa_jdr.map1.hello" = "jade"
"outputs.drs_usa_jdr.map2.hello" = "jade"
"outputs.drs_usa_jdr.map2.attempt" = "2"
}
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,9 @@ class DrsLocalizerMain(toResolveAndDownload: IO[List[UnresolvedDrsUrl]],
)
} else {
IO.raiseError(
new RuntimeException(s"Exhausted $resolutionRetries resolution retries to resolve $drsUrlToResolve.drsUrl", t)
new RuntimeException(s"Exhausted $resolutionRetries resolution retries to resolve ${drsUrlToResolve.drsUrl}",
t
)
)
}

Expand Down
6 changes: 6 additions & 0 deletions src/ci/resources/gcp_batch_application.conf
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ backend {
include "gcp_batch_provider_config.inc.conf"
}
}
GCPBATCH-usa {
actor-factory = "cromwell.backend.google.batch.GcpBatchBackendLifecycleActorFactory"
config {
include "gcp_batch_provider_config.inc.conf"
}
}
GCPBATCHParallelCompositeUploads {
actor-factory = "cromwell.backend.google.batch.GcpBatchBackendLifecycleActorFactory"
config {
Expand Down
18 changes: 18 additions & 0 deletions src/ci/resources/gcp_batch_shared_application.inc.conf
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,24 @@ backend {
filesystems.http {}
}
}
GCPBATCH-usa {
actor-factory = "REPLACEME!"
config {
# When importing: Remember to also include an appropriate provider_config.inc.conf here.

project = "user_error: google_project must be set in workflow options http://cromwell.readthedocs.io/en/develop/wf_options/Google/"
root = "user_error: jes_gcs_root must be set in workflow options http://cromwell.readthedocs.io/en/develop/wf_options/Google/"
batch.compute-service-account = "user_error: google_compute_service_account must be set in workflow options http://cromwell.readthedocs.io/en/develop/wf_options/Google/"
batch.auth = "user_service_account"
filesystems.http {}
filesystems.drs.auth = "user_service_account"
filesystems.gcs.auth = "user_service_account"
filesystems.gcs.project = "user_error: user_service_account must be set in workflow options http://cromwell.readthedocs.io/en/develop/wf_options/Google/"

# Have the engine authenticate to docker.io. See BT-141 for more info.
include "dockerhub_provider_config_v1.inc.conf"
}
}
GCPBATCHParallelCompositeUploads {
actor-factory = "REPLACEME!"
config {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{{with $cromwellServiceAccount := secret (printf "secret/dsde/cromwell/common/cromwell-service-account.json")}}
{
"google_compute_service_account": "{{$cromwellServiceAccount.Data.client_email}}",
"user_service_account_json": "{{$cromwellServiceAccount.Data | toJSON | replaceAll "\\" "\\\\" | replaceAll "\"" "\\\"" }}",
"google_project": "broad-dsde-cromwell-dev",
"jes_gcs_root": "gs://cloud-cromwell-dev-self-cleaning/cromwell_execution/ci",
{{/* We're re-using the WDL with the same inputs in multiple tests, make sure we don't accidentally cache hit */}}
"read_from_cache": false,
{{/* Test pre-resolving DrsPath to GcsPath */}}
"override_preresolve_for_test": true
}
{{end}}
2 changes: 1 addition & 1 deletion src/ci/resources/papi_v2_usa_preresolve.options.json.ctmpl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"google_compute_service_account": "{{$cromwellServiceAccount.Data.client_email}}",
"user_service_account_json": "{{$cromwellServiceAccount.Data | toJSON | replaceAll "\\" "\\\\" | replaceAll "\"" "\\\"" }}",
"google_project": "broad-dsde-cromwell-dev",
"jes_gcs_root": "gs://cloud-cromwell-dev-self-cleaning/cromwell_execution/ci",
"gcp_batch_gcs_root": "gs://cloud-cromwell-dev-self-cleaning/cromwell_execution/ci",
{{/* We're re-using the WDL with the same inputs in multiple tests, make sure we don't accidentally cache hit */}}
"read_from_cache": false,
{{/* Test pre-resolving DrsPath to GcsPath */}}
Expand Down

0 comments on commit 9853b52

Please sign in to comment.