Skip to content

Instantly share code, notes, and snippets.

@swang373
Last active June 12, 2018 18:06
Show Gist options
  • Save swang373/f53dd2ab26827a279dc46e5d3ca851d3 to your computer and use it in GitHub Desktop.
Save swang373/f53dd2ab26827a279dc46e5d3ca851d3 to your computer and use it in GitHub Desktop.
Finding the arguments for AnalysisTools CMSConnect jobs that require resubmission
import glob
import os
SEARCH_PATTERN = '/path/to/your/output/dir/{0}/*.root'
SAMPLES = [
'DYToLL_madgraph',
'ZH125_ZNuNu_powheg',
]
if __name__ == '__main__':
for sample in SAMPLES:
print '# Checking {0}'.format(sample)
with open('submitfiles/{0}.submit'.format(sample)) as f:
n_jobs = sum(1 for line in f.read().splitlines() if line.startswith('queue'))
# Extract the job ids from the output files.
# WARNING: This assumes the output file number for a sample was
# assigned using the 0-indexed $(Process) macro provided by HTCondor.
jobs_with_output_file = [
int(os.path.splitext(os.path.basename(path))[0].split('_')[-1])
for path in glob.glob(SEARCH_PATTERN.format(sample))
]
# Find the set of job ids for jobs without output files.
jobs_without_output_file = set(range(n_jobs)) - set(jobs_with_output_file)
# Create a list of arguments for each job. The job id should correspond to
# the list index. This assumes you've used scp to copy over the submit files
# from CMSConnect into a local directory called submitfiles. This logic parses
# CMSConnect submit files specifically, but feel free to modify it to suit your needs.
with open('submitfiles/{0}.submit'.format(sample)) as f:
job_arguments = [line.replace('arguments = ', '').strip() for line in f if line.startswith('arguments')]
# Armed with the job ids for the jobs without output files
# and a list of job arguments, print out the arguments that will
# need to go into a resubmit file.
for i in jobs_without_output_file:
args = job_arguments[i].replace('$(Process)', str(i))
print args
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment