Skip to content

Commit

Permalink
do not download user sandbox in TW (#8745)
Browse files Browse the repository at this point in the history
  • Loading branch information
belforte authored Oct 18, 2024
1 parent 89d0ca2 commit 8170e49
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 16 deletions.
15 changes: 9 additions & 6 deletions src/python/CRABInterface/RESTCache.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,14 +123,17 @@ def get(self, subresource, objecttype, taskname, username, tarballname, clientme
if not objecttype:
raise MissingParameter("objecttype is missing")
if objecttype == 'sandbox':
# sandboxes can be reused across tasks, so are user, but not task, specific
# object path in the S3 container is: username/sandboxes/tarballname
# username indicates the user who owns the sandbox which may be different from
# the user who is making the REST call (authenticatedUserName).
# We rely on client to provide correct username.
# But when uploading from CRABClient, username is not known to the caller code
# for historical reasons so we settle for the authenticatedUserName.
# Having a special condition here is ugly but easier than fixing CRABClient
if not tarballname:
raise MissingParameter("tarballname is missing")
if subresource == 'upload' or (subresource == 'download' and clientmethod == 'head_object'):
ownerName = authenticatedUserName
else:
ownerName = username
if not ownerName:
raise MissingParameter("username is missing")
ownerName = username if username else authenticatedUserName
# sandbox goes in bucket/username/sandboxes/
objectPath = ownerName + '/sandboxes/' + tarballname
else:
Expand Down
27 changes: 17 additions & 10 deletions src/python/TaskWorker/Actions/DagmanCreator.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from ast import literal_eval

from ServerUtilities import MAX_DISK_SPACE, MAX_IDLE_JOBS, MAX_POST_JOBS, TASKLIFETIME
from ServerUtilities import getLock, downloadFromS3
from ServerUtilities import getLock, downloadFromS3, checkS3Object, uploadToS3

import TaskWorker.DataObjects.Result
from TaskWorker.Actions.TaskAction import TaskAction
Expand Down Expand Up @@ -544,8 +544,7 @@ def makeJobSubmit(self, task):
info['additional_environment_options'] += ' CRAB_TASKMANAGER_TARBALL=local'
else:
raise TaskWorkerException("Cannot find TaskManagerRun.tar.gz inside the cwd: %s" % os.getcwd())
if os.path.exists("sandbox.tar.gz"):
info['additional_input_file'] += ", sandbox.tar.gz"
info['additional_input_file'] += ", sandbox.tar.gz" # it will be present on SPOOL_DIR after dab_bootstrap
info['additional_input_file'] += ", run_and_lumis.tar.gz"
info['additional_input_file'] += ", input_files.tar.gz"
info['additional_input_file'] += ", submit_env.sh"
Expand Down Expand Up @@ -728,6 +727,11 @@ def prepareLocal(self, dagSpecs, info, kw, inputFiles, subdags):
tf.add(ifname)
finally:
tf.close()
# also upload InputFiles.tar.gz to s3
task = kw['task']['tm_taskname']
uploadToS3(crabserver=self.crabserver, filepath='InputFiles.tar.gz',
objecttype='runtimefiles', taskname=task,
logger=self.logger)

def createSubdag(self, splitterResult, **kwargs):

Expand Down Expand Up @@ -1124,6 +1128,7 @@ def extractMonitorFiles(self, inputFiles, **kw):
Modifies inputFiles list by appending the debug folder if the extraction succeeds.
"""

tarFileName = 'sandbox.tar.gz' if not os.path.isfile('debug_files.tar.gz') else 'debug_files.tar.gz'
try:
debugTar = tarfile.open(tarFileName)
Expand Down Expand Up @@ -1195,20 +1200,24 @@ def executeInternal(self, *args, **kw):
# Bootstrap the ISB if we are running in the TW
if self.crabserver:
username = kw['task']['tm_username']
taskname = kw['task']['tm_taskname']
sandboxName = kw['task']['tm_user_sandbox']
dbgFilesName = kw['task']['tm_debug_files']
self.logger.debug(f"Checking if sandbox file is available: {sandboxName}")
try:
downloadFromS3(crabserver=self.crabserver, objecttype='sandbox', username=username,
tarballname=sandboxName, filepath=sandboxTarBall, logger=self.logger)
checkS3Object(crabserver=self.crabserver, objecttype='sandbox', taskname=taskname,
username=username, tarballname=sandboxName, logger=self.logger)
kw['task']['tm_user_sandbox'] = sandboxTarBall
except Exception as ex:
raise TaskWorkerException("The CRAB server backend could not download the input sandbox with your code " + \
raise TaskWorkerException("The CRAB server backend could not find the input sandbox with your code " + \
"from S3.\nThis could be a temporary glitch; please try to submit a new task later " + \
"(resubmit will not work) and contact the experts if the error persists.\nError reason: %s" % str(ex)) from ex
"(resubmit will not work) and contact the experts if the error persists." + \
f"\nError reason: {ex}") from ex
# still need this download, until we change AdjustSites.py to do it there
try:
downloadFromS3(crabserver=self.crabserver, objecttype='sandbox', username=username,
tarballname=dbgFilesName, filepath=debugTarBall, logger=self.logger)
except Exception as ex: # pylint: disable=broad-except
except Exception as ex: # pylint: disable=broad-except
self.logger.exception(ex)

# Bootstrap the runtime if it is available.
Expand All @@ -1227,8 +1236,6 @@ def executeInternal(self, *args, **kw):

self.extractMonitorFiles(inputFiles, **kw)

if kw['task'].get('tm_user_sandbox') == 'sandbox.tar.gz':
inputFiles.append('sandbox.tar.gz')
if os.path.exists("CMSRunAnalysis.tar.gz"):
inputFiles.append("CMSRunAnalysis.tar.gz")
if os.path.exists("TaskManagerRun.tar.gz"):
Expand Down

0 comments on commit 8170e49

Please sign in to comment.