Skip to content

Instantly share code, notes, and snippets.

@swang373
Last active June 12, 2018 17:59
Show Gist options
  • Save swang373/1b71ae6c4800ce7489127f35ccfcbb8d to your computer and use it in GitHub Desktop.
Save swang373/1b71ae6c4800ce7489127f35ccfcbb8d to your computer and use it in GitHub Desktop.
Check the integrity of files in an AnalysisTools output directory
import glob
import ROOT
import concurrent.futures
def check_file(path):
f = ROOT.TFile.Open(path)
# The file is somehow unreadable.
if not f:
return path, None
# The file is incomplete or corrupt.
if f.TestBit(ROOT.TFile.kRecovered) or f.IsZombie():
return path, None
try:
events = f.Get('Events')
n_entries = events.GetEntriesFast()
return path, n_entries
except Exception:
return path, None
finally:
f.Close()
if __name__ == '__main__':
# Output directory globbing pattern.
SEARCH_PATTERN = '/path/to/the/output/dir/{0}/*.root'
# The sample names.
SAMPLES = ['ZH', 'DYToLL', 'QCDHT300']
for sample in SAMPLES:
print 'Checking {0}'.format(sample)
with concurrent.futures.ThreadPoolExecutor(42) as executor:
futures = [executor.submit(check_file, path) for path in glob.glob(SEARCH_PATTERN.format(sample))]
bad_paths = []
for future in concurrent.futures.as_completed(futures):
path, n_entries = future.result()
if n_entries is None:
bad_paths.append(path)
elif n_entries == 0:
print '{0} has a tree with no events!'.format(path)
print 'There are {0!s} good files out of a total of {1!s}'.format(len(futures) - len(bad_paths), len(futures))
print 'The following files are likely corrupt: {0}'.format('\n'.join(bad_paths))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment